This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 68087f6c823 [fix](json function) Fix the slow performance of get_json_path when processing JSONB (#24631) 68087f6c823 is described below commit 68087f6c8239a1081ba90ce746d183dd04482afa Author: Chenyang Sun <csun5...@gmail.com> AuthorDate: Wed Sep 27 21:17:39 2023 +0800 [fix](json function) Fix the slow performance of get_json_path when processing JSONB (#24631) When processing JSONB, automatically convert to jsonb_extract_string --- be/src/vec/functions/function.h | 5 + be/src/vec/functions/function_json.cpp | 7 + be/src/vec/functions/function_jsonb.cpp | 28 ++ be/src/vec/functions/function_totype.h | 9 + be/src/vec/functions/simple_function_factory.h | 3 +- be/test/vec/function/function_jsonb_test.cpp | 570 +++++++++++++++++++++++++ gensrc/script/doris_builtins_functions.py | 8 + 7 files changed, 628 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index f3dc818db88..aeea5d1df04 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -65,6 +65,11 @@ struct NullPresence { bool has_null_constant = false; }; +template <typename T> +concept HasGetVariadicArgumentTypesImpl = requires(T t) { + { t.get_variadic_argument_types_impl() } -> std::same_as<DataTypes>; +}; + NullPresence get_null_presence(const Block& block, const ColumnNumbers& args); [[maybe_unused]] NullPresence get_null_presence(const ColumnsWithTypeAndName& args); diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 05c199e3540..508ade81e6e 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -273,6 +273,10 @@ struct GetJsonNumberType { using ColumnType = typename NumberType::ColumnType; using Container = typename ColumnType::Container; + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; + } + static void get_json_impl(rapidjson::Value*& root, const std::string_view& json_string, const std::string_view& path_string, rapidjson::Document& document, typename NumberType::T& res, UInt8& null_map) { @@ -526,6 +530,9 @@ struct GetJsonString { res_offsets); } } + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; + } }; template <int flag> diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 14f31c33b53..a7d43aca4f1 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -25,6 +25,7 @@ #include <string> #include <string_view> #include <tuple> +#include <type_traits> #include <utility> // IWYU pragma: no_include <opentelemetry/common/threadlocal.h> @@ -352,6 +353,13 @@ public: DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return make_nullable(std::make_shared<typename Impl::ReturnType>()); } + DataTypes get_variadic_argument_types_impl() const override { + if constexpr (vectorized::HasGetVariadicArgumentTypesImpl<Impl>) { + return Impl::get_variadic_argument_types_impl(); + } else { + return {}; + } + } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { @@ -958,11 +966,19 @@ struct JsonbExtractBool : public JsonbExtractImpl<JsonbTypeBool> { struct JsonbExtractInt : public JsonbExtractImpl<JsonbTypeInt> { static constexpr auto name = "json_extract_int"; static constexpr auto alias = "jsonb_extract_int"; + static constexpr auto name2 = "get_json_int"; + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; + } }; struct JsonbExtractBigInt : public JsonbExtractImpl<JsonbTypeInt64> { static constexpr auto name = "json_extract_bigint"; static constexpr auto alias = "jsonb_extract_bigint"; + static constexpr auto name2 = "get_json_bigint"; + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; + } }; struct JsonbExtractLargeInt : public JsonbExtractImpl<JsonbTypeInt128> { @@ -973,11 +989,19 @@ struct JsonbExtractLargeInt : public JsonbExtractImpl<JsonbTypeInt128> { struct JsonbExtractDouble : public JsonbExtractImpl<JsonbTypeDouble> { static constexpr auto name = "json_extract_double"; static constexpr auto alias = "jsonb_extract_double"; + static constexpr auto name2 = "get_json_double"; + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; + } }; struct JsonbExtractString : public JsonbExtractStringImpl<JsonbTypeString> { static constexpr auto name = "json_extract_string"; static constexpr auto alias = "jsonb_extract_string"; + static constexpr auto name2 = "get_json_string"; + static DataTypes get_variadic_argument_types_impl() { + return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; + } }; struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> { @@ -1310,14 +1334,18 @@ void register_function_jsonb(SimpleFunctionFactory& factory) { factory.register_alias(FunctionJsonbExtractBool::name, FunctionJsonbExtractBool::alias); factory.register_function<FunctionJsonbExtractInt>(); factory.register_alias(FunctionJsonbExtractInt::name, FunctionJsonbExtractInt::alias); + factory.register_function<FunctionJsonbExtractInt>(JsonbExtractInt::name2); factory.register_function<FunctionJsonbExtractBigInt>(); factory.register_alias(FunctionJsonbExtractBigInt::name, FunctionJsonbExtractBigInt::alias); + factory.register_function<FunctionJsonbExtractBigInt>(JsonbExtractBigInt::name2); factory.register_function<FunctionJsonbExtractLargeInt>(); factory.register_alias(FunctionJsonbExtractLargeInt::name, FunctionJsonbExtractLargeInt::alias); factory.register_function<FunctionJsonbExtractDouble>(); factory.register_alias(FunctionJsonbExtractDouble::name, FunctionJsonbExtractDouble::alias); + factory.register_function<FunctionJsonbExtractDouble>(JsonbExtractDouble::name2); factory.register_function<FunctionJsonbExtractString>(); factory.register_alias(FunctionJsonbExtractString::name, FunctionJsonbExtractString::alias); + factory.register_function<FunctionJsonbExtractString>(JsonbExtractString::name2); factory.register_function<FunctionJsonbExtractJsonb>(); // factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias); diff --git a/be/src/vec/functions/function_totype.h b/be/src/vec/functions/function_totype.h index f2f0934eadb..cb050ef84c1 100644 --- a/be/src/vec/functions/function_totype.h +++ b/be/src/vec/functions/function_totype.h @@ -25,6 +25,7 @@ #include "vec/columns/column_vector.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_bitmap.h" +#include "vec/data_types/data_type_jsonb.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" @@ -385,6 +386,14 @@ public: DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return make_nullable(std::make_shared<typename Impl::ReturnType>()); } + + DataTypes get_variadic_argument_types_impl() const override { + if constexpr (vectorized::HasGetVariadicArgumentTypesImpl<Impl>) { + return Impl::get_variadic_argument_types_impl(); + } else { + return {}; + } + } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { auto null_map = ColumnUInt8::create(input_rows_count, 0); diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index bdf53d9acd5..86d4d45b1ff 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -116,7 +116,6 @@ public: if (!types.empty()) { function_variadic_set.insert(name); } - std::string key_str = name; if (!types.empty()) { for (const auto& type : types) { @@ -137,7 +136,7 @@ public: template <class Function> void register_function(std::string name) { - function_creators[name] = &createDefaultFunction<Function>; + register_function(name, &createDefaultFunction<Function>); } void register_alias(const std::string& name, const std::string& alias) { diff --git a/be/test/vec/function/function_jsonb_test.cpp b/be/test/vec/function/function_jsonb_test.cpp index 3efc33a9f93..7f29e325ae3 100644 --- a/be/test/vec/function/function_jsonb_test.cpp +++ b/be/test/vec/function/function_jsonb_test.cpp @@ -1487,4 +1487,574 @@ TEST(FunctionJsonbTEST, JsonbCastFromOtherTest) { "CAST", {Notnull {TypeIndex::String}, ConstedNotnull {TypeIndex::JSONB}}, {{{STRING(R"("abcd")"), Null()}, STRING(R"("abcd")")}}); } + +TEST(FunctionJsonbTEST, GetJSONSTRINGTest) { + std::string func_name = "get_json_string"; + InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String}; + + // get json from root + DataSet data_set = { + {{Null(), STRING("$")}, Null()}, + {{STRING("null"), STRING("$")}, STRING("null")}, + {{STRING("true"), STRING("$")}, STRING("true")}, + {{STRING("false"), STRING("$")}, STRING("false")}, + {{STRING("100"), STRING("$")}, STRING("100")}, //int8 + {{STRING("10000"), STRING("$")}, STRING("10000")}, // int16 + {{STRING("1000000000"), STRING("$")}, STRING("1000000000")}, // int32 + {{STRING("1152921504606846976"), STRING("$")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18"), STRING("$")}, STRING("6.18")}, // double + {{STRING(R"("abcd")"), STRING("$")}, STRING("abcd")}, // string + {{STRING("{}"), STRING("$")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, + STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]"), STRING("$")}, STRING("[]")}, // empty array + {{STRING("[123, 456]"), STRING("$")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$")}, + STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + }; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + + // get json from obejct + data_set = { + {{Null(), STRING("$.k1")}, Null()}, + {{STRING("null"), STRING("$.k1")}, Null()}, + {{STRING("true"), STRING("$.k1")}, Null()}, + {{STRING("false"), STRING("$.k1")}, Null()}, + {{STRING("100"), STRING("$.k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$.k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$.k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string + {{STRING("{}"), STRING("$.k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, STRING("v31")}, // object + {{STRING("[]"), STRING("$.k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")}, + Null()}, // complex array + }; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + + // get json from array + data_set = { + {{Null(), STRING("$[0]")}, Null()}, + {{STRING("null"), STRING("$[0]")}, Null()}, + {{STRING("true"), STRING("$[0]")}, Null()}, + {{STRING("false"), STRING("$[0]")}, Null()}, + {{STRING("100"), STRING("$[0]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string + {{STRING("{}"), STRING("$[0]")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, + STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]"), STRING("$[0]")}, Null()}, // empty array + {{STRING("null"), STRING("$[1]")}, Null()}, + {{STRING("true"), STRING("$[1]")}, Null()}, + {{STRING("false"), STRING("$[1]")}, Null()}, + {{STRING("100"), STRING("$[1]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[1]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[1]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string + {{STRING("{}"), STRING("$[1]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object + {{STRING("[]"), STRING("$[1]")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0]")}, STRING("123")}, // int array + {{STRING("[123, 456]"), STRING("$[1]")}, STRING("456")}, // int array + {{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0]")}, STRING("abc")}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[1]")}, STRING("def")}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")}, + STRING("null")}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")}, + STRING("true")}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")}, + STRING("false")}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")}, + STRING("100")}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")}, + STRING("6.18")}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")}, + STRING("abc")}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")}, + STRING(R"({"k1":"v41","k2":400})")}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")}, + STRING("1")}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")}, + STRING("a")}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")}, + STRING("3.14")}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")}, + Null()}, // complex array + }; + + check_function<DataTypeString, true>(func_name, input_types, data_set); + + // get json with path $[0].k1 + data_set = { + {{Null(), STRING("$[0].k1")}, Null()}, + {{STRING("null"), STRING("$[0].k1")}, Null()}, + {{STRING("true"), STRING("$[0].k1")}, Null()}, + {{STRING("false"), STRING("$[0].k1")}, Null()}, + {{STRING("100"), STRING("$[0].k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string + {{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, STRING(R"(v31)")}, // object + {{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")}, + STRING("v41")}, // complex array + }; + + check_function<DataTypeString, true>(func_name, input_types, data_set); +} + +TEST(FunctionJsonbTEST, GetJsonIntTest) { + std::string func_name = "get_json_int"; + InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String}; + + // get json from root + DataSet data_set = { + {{Null(), STRING("$")}, Null()}, + {{STRING("null"), STRING("$")}, Null()}, + {{STRING("true"), STRING("$")}, Null()}, + {{STRING("false"), STRING("$")}, Null()}, + {{STRING("100"), STRING("$")}, INT(100)}, //int8 + {{STRING("10000"), STRING("$")}, INT(10000)}, // int16 + {{STRING("1000000000"), STRING("$")}, INT(1000000000)}, // int32 + {{STRING("1152921504606846976"), STRING("$")}, Null()}, // int64 + {{STRING("6.18"), STRING("$")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$")}, Null()}, // string + {{STRING("{}"), STRING("$")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, Null()}, // object + {{STRING("[]"), STRING("$")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")}, + Null()}, // complex array + }; + + check_function<DataTypeInt32, true>(func_name, input_types, data_set); + + // get json from obejct + data_set = { + {{Null(), STRING("$.k1")}, Null()}, + {{STRING("null"), STRING("$.k1")}, Null()}, + {{STRING("true"), STRING("$.k1")}, Null()}, + {{STRING("false"), STRING("$.k1")}, Null()}, + {{STRING("100"), STRING("$.k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$.k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$.k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string + {{STRING("{}"), STRING("$.k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, Null()}, // object + {{STRING("[]"), STRING("$.k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")}, + Null()}, // complex array + }; + + check_function<DataTypeInt32, true>(func_name, input_types, data_set); + + // get json from array + data_set = { + {{Null(), STRING("$[0]")}, Null()}, + {{STRING("null"), STRING("$[0]")}, Null()}, + {{STRING("true"), STRING("$[0]")}, Null()}, + {{STRING("false"), STRING("$[0]")}, Null()}, + {{STRING("100"), STRING("$[0]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string + {{STRING("{}"), STRING("$[0]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, Null()}, // object + {{STRING("[]"), STRING("$[0]")}, Null()}, // empty array + {{STRING("null"), STRING("$[1]")}, Null()}, + {{STRING("true"), STRING("$[1]")}, Null()}, + {{STRING("false"), STRING("$[1]")}, Null()}, + {{STRING("100"), STRING("$[1]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[1]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[1]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string + {{STRING("{}"), STRING("$[1]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object + {{STRING("[]"), STRING("$[1]")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0]")}, INT(123)}, // int array + {{STRING("[123, 456]"), STRING("$[1]")}, INT(456)}, // int array + {{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0]")}, Null()}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[1]")}, Null()}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")}, + INT(100)}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")}, + INT(1)}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")}, + Null()}, // complex array + }; + + check_function<DataTypeInt32, true>(func_name, input_types, data_set); + + // get json with path $[0].k1 + data_set = { + {{Null(), STRING("$[0].k1")}, Null()}, + {{STRING("null"), STRING("$[0].k1")}, Null()}, + {{STRING("true"), STRING("$[0].k1")}, Null()}, + {{STRING("false"), STRING("$[0].k1")}, Null()}, + {{STRING("100"), STRING("$[0].k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string + {{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, Null()}, // object + {{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k2")}, + INT(400)}, // complex array + }; + + check_function<DataTypeInt32, true>(func_name, input_types, data_set); +} + +TEST(FunctionJsonbTEST, GetJsonBigIntTest) { + std::string func_name = "get_json_bigint"; + InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String}; + + // get json from root + DataSet data_set = { + {{Null(), STRING("$")}, Null()}, + {{STRING("null"), STRING("$")}, Null()}, + {{STRING("true"), STRING("$")}, Null()}, + {{STRING("false"), STRING("$")}, Null()}, + {{STRING("100"), STRING("$")}, BIGINT(100)}, //int8 + {{STRING("10000"), STRING("$")}, BIGINT(10000)}, // int16 + {{STRING("1000000000"), STRING("$")}, BIGINT(1000000000)}, // int32 + {{STRING("1152921504606846976"), STRING("$")}, BIGINT(1152921504606846976)}, // int64 + {{STRING("6.18"), STRING("$")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$")}, Null()}, // string + {{STRING("{}"), STRING("$")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, Null()}, // object + {{STRING("[]"), STRING("$")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")}, + Null()}, // complex array + }; + + check_function<DataTypeInt64, true>(func_name, input_types, data_set); + + // get json from obejct + data_set = { + {{Null(), STRING("$.k1")}, Null()}, + {{STRING("null"), STRING("$.k1")}, Null()}, + {{STRING("true"), STRING("$.k1")}, Null()}, + {{STRING("false"), STRING("$.k1")}, Null()}, + {{STRING("100"), STRING("$.k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$.k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$.k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string + {{STRING("{}"), STRING("$.k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, Null()}, // object + {{STRING("[]"), STRING("$.k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")}, + Null()}, // complex array + }; + + check_function<DataTypeInt64, true>(func_name, input_types, data_set); + + // get json from array + data_set = { + {{Null(), STRING("$[0]")}, Null()}, + {{STRING("null"), STRING("$[0]")}, Null()}, + {{STRING("true"), STRING("$[0]")}, Null()}, + {{STRING("false"), STRING("$[0]")}, Null()}, + {{STRING("100"), STRING("$[0]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string + {{STRING("{}"), STRING("$[0]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, Null()}, // object + {{STRING("[]"), STRING("$[0]")}, Null()}, // empty array + {{STRING("null"), STRING("$[1]")}, Null()}, + {{STRING("true"), STRING("$[1]")}, Null()}, + {{STRING("false"), STRING("$[1]")}, Null()}, + {{STRING("100"), STRING("$[1]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[1]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[1]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string + {{STRING("{}"), STRING("$[1]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object + {{STRING("[]"), STRING("$[1]")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0]")}, BIGINT(123)}, // int array + {{STRING("[123, 456]"), STRING("$[1]")}, BIGINT(456)}, // int array + {{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0]")}, Null()}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[1]")}, Null()}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")}, + BIGINT(100)}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")}, + BIGINT(1)}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")}, + Null()}, // complex array + }; + + check_function<DataTypeInt64, true>(func_name, input_types, data_set); + + // get json with path $[0].k1 + data_set = { + {{Null(), STRING("$[0].k1")}, Null()}, + {{STRING("null"), STRING("$[0].k1")}, Null()}, + {{STRING("true"), STRING("$[0].k1")}, Null()}, + {{STRING("false"), STRING("$[0].k1")}, Null()}, + {{STRING("100"), STRING("$[0].k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string + {{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, Null()}, // object + {{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k2")}, + BIGINT(400)}, // complex array + }; + + check_function<DataTypeInt64, true>(func_name, input_types, data_set); +} + +TEST(FunctionJsonbTEST, GetJsonDoubleTest) { + std::string func_name = "get_json_double"; + InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String}; + + // get json from root + DataSet data_set = { + {{Null(), STRING("$")}, Null()}, + {{STRING("null"), STRING("$")}, Null()}, + {{STRING("true"), STRING("$")}, Null()}, + {{STRING("false"), STRING("$")}, Null()}, + {{STRING("100"), STRING("$")}, DOUBLE(100)}, //int8 + {{STRING("10000"), STRING("$")}, DOUBLE(10000)}, // int16 + {{STRING("1000000000"), STRING("$")}, DOUBLE(1000000000)}, // int32 + {{STRING("1152921504606846976"), STRING("$")}, DOUBLE(1152921504606846976)}, // int64 + {{STRING("6.18"), STRING("$")}, DOUBLE(6.18)}, // double + {{STRING(R"("abcd")"), STRING("$")}, Null()}, // string + {{STRING("{}"), STRING("$")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, Null()}, // object + {{STRING("[]"), STRING("$")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")}, + Null()}, // complex array + }; + + check_function<DataTypeFloat64, true>(func_name, input_types, data_set); + + // get json from obejct + data_set = { + {{STRING("null"), STRING("$.k1")}, Null()}, + {{STRING("true"), STRING("$.k1")}, Null()}, + {{STRING("false"), STRING("$.k1")}, Null()}, + {{STRING("100"), STRING("$.k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$.k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$.k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string + {{STRING("{}"), STRING("$.k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, Null()}, // object + {{STRING("[]"), STRING("$.k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")}, + Null()}, // complex array + }; + + check_function<DataTypeFloat64, true>(func_name, input_types, data_set); + + // get json from array + data_set = { + {{STRING("null"), STRING("$[0]")}, Null()}, + {{STRING("true"), STRING("$[0]")}, Null()}, + {{STRING("false"), STRING("$[0]")}, Null()}, + {{STRING("100"), STRING("$[0]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string + {{STRING("{}"), STRING("$[0]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, Null()}, // object + {{STRING("[]"), STRING("$[0]")}, Null()}, // empty array + {{STRING("null"), STRING("$[1]")}, Null()}, + {{STRING("true"), STRING("$[1]")}, Null()}, + {{STRING("false"), STRING("$[1]")}, Null()}, + {{STRING("100"), STRING("$[1]")}, Null()}, //int8 + {{STRING("10000"), STRING("$[1]")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[1]")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string + {{STRING("{}"), STRING("$[1]")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object + {{STRING("[]"), STRING("$[1]")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0]")}, DOUBLE(123)}, // int array + {{STRING("[123, 456]"), STRING("$[1]")}, DOUBLE(456)}, // int array + {{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0]")}, Null()}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[1]")}, Null()}, // string array + {{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")}, + DOUBLE(100)}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")}, + DOUBLE(6.18)}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")}, + Null()}, // multi type array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")}, + DOUBLE(1)}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")}, + DOUBLE(3.14)}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")}, + Null()}, // complex array + }; + + check_function<DataTypeFloat64, true>(func_name, input_types, data_set); + + // get json with path $[0].k1 + data_set = { + {{STRING("null"), STRING("$[0].k1")}, Null()}, + {{STRING("true"), STRING("$[0].k1")}, Null()}, + {{STRING("false"), STRING("$[0].k1")}, Null()}, + {{STRING("100"), STRING("$[0].k1")}, Null()}, //int8 + {{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16 + {{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32 + {{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64 + {{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double + {{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string + {{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, Null()}, // object + {{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array + {{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array + {{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")}, + Null()}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")}, + Null()}, // complex array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k2")}, + DOUBLE(400)}, // complex array + }; + + check_function<DataTypeFloat64, true>(func_name, input_types, data_set); +} } // namespace doris::vectorized diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 0223c54524f..8cbb924c358 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1781,6 +1781,14 @@ visible_functions = { [['get_json_string'], 'STRING', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'], [['get_json_bigint'], 'BIGINT', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'], [['get_json_bigint'], 'BIGINT', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'], + [['get_json_string'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'], + [['get_json_string'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'], + [['get_json_int'], 'INT', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'], + [['get_json_int'], 'INT', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'], + [['get_json_double'], 'DOUBLE', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'], + [['get_json_double'], 'DOUBLE', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'], + [['get_json_bigint'], 'BIGINT', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'], + [['get_json_bigint'], 'BIGINT', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'], [['json_array'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'], [['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'], --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org