This is an automated email from the ASF dual-hosted git repository. lide pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 97135a1cbb [Feature] (json)add json_contains function (#20824) 97135a1cbb is described below commit 97135a1cbb2d3483479964515e10a0fde8fc551c Author: yuxuan-luo <119841515+yuxuan-...@users.noreply.github.com> AuthorDate: Fri Jun 16 15:10:12 2023 +0800 [Feature] (json)add json_contains function (#20824) --- be/src/vec/functions/function_json.cpp | 127 +++++++++++++++++++++ .../sql-functions/json-functions/json_contains.md | 69 +++++++++++ .../sql-functions/json-functions/json_contains.md | 69 +++++++++++ gensrc/script/doris_builtins_functions.py | 1 + .../json_functions/test_json_function.out | 21 ++++ .../json_functions/test_json_function.groovy | 9 ++ 6 files changed, 296 insertions(+) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index e4f955ae0a..4d2f0f3614 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -18,6 +18,7 @@ #include <rapidjson/allocators.h> #include <rapidjson/document.h> #include <rapidjson/encodings.h> +#include <rapidjson/pointer.h> #include <rapidjson/rapidjson.h> #include <rapidjson/stringbuffer.h> #include <rapidjson/writer.h> @@ -935,6 +936,131 @@ public: } }; +class FunctionJsonContains : public IFunction { +public: + static constexpr auto name = "json_contains"; + static FunctionPtr create() { return std::make_shared<FunctionJsonContains>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 3; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeInt32>()); + } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool json_contains_object(const rapidjson::Value& target, + const rapidjson::Value& search_value) { + if (!target.IsObject() || !search_value.IsObject()) { + return false; + } + + for (auto itr = search_value.MemberBegin(); itr != search_value.MemberEnd(); ++itr) { + if (!target.HasMember(itr->name) || !json_contains(target[itr->name], itr->value)) { + return false; + } + } + + return true; + } + + bool json_contains_array(const rapidjson::Value& target, const rapidjson::Value& search_value) { + if (!target.IsArray() || !search_value.IsArray()) { + return false; + } + + for (auto itr = search_value.Begin(); itr != search_value.End(); ++itr) { + bool found = false; + for (auto target_itr = target.Begin(); target_itr != target.End(); ++target_itr) { + if (json_contains(*target_itr, *itr)) { + found = true; + break; + } + } + if (!found) { + return false; + } + } + + return true; + } + + bool json_contains(const rapidjson::Value& target, const rapidjson::Value& search_value) { + if (target == search_value) { + return true; + } + + if (target.IsObject() && search_value.IsObject()) { + return json_contains_object(target, search_value); + } + + if (target.IsArray() && search_value.IsArray()) { + return json_contains_array(target, search_value); + } + + return false; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + const IColumn& col_json = *(block.get_by_position(arguments[0]).column); + const IColumn& col_search = *(block.get_by_position(arguments[1]).column); + const IColumn& col_path = *(block.get_by_position(arguments[2]).column); + + auto null_map = ColumnUInt8::create(input_rows_count, 0); + + const ColumnString* col_json_string = check_and_get_column<ColumnString>(col_json); + const ColumnString* col_search_string = check_and_get_column<ColumnString>(col_search); + const ColumnString* col_path_string = check_and_get_column<ColumnString>(col_path); + + if (!col_json_string || !col_search_string || !col_path_string) { + return Status::RuntimeError("Illegal column should be ColumnString"); + } + + auto col_to = ColumnVector<vectorized::Int32>::create(); + auto& vec_to = col_to->get_data(); + size_t size = col_json.size(); + vec_to.resize(size); + + for (size_t i = 0; i < input_rows_count; ++i) { + if (col_json.is_null_at(i) || col_search.is_null_at(i) || col_path.is_null_at(i)) { + null_map->get_data()[i] = 1; + vec_to[i] = 0; + continue; + } + + const auto& json_val = col_json_string->get_data_at(i); + const auto& search_val = col_search_string->get_data_at(i); + const auto& path_val = col_path_string->get_data_at(i); + + std::string_view json_string(json_val.data, json_val.size); + std::string_view search_string(search_val.data, search_val.size); + std::string_view path_string(path_val.data, path_val.size); + + rapidjson::Document document; + auto target_val = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document); + if (target_val == nullptr || target_val->IsNull()) { + vec_to[i] = 0; + } else { + rapidjson::Document search_doc; + search_doc.Parse(search_string.data(), search_string.size()); + if (json_contains(*target_val, search_doc)) { + vec_to[i] = 1; + } else { + vec_to[i] = 0; + } + } + } + + block.replace_by_position(result, + ColumnNullable::create(std::move(col_to), std::move(null_map))); + + return Status::OK(); + } +}; + class FunctionJsonUnquote : public IFunction { public: static constexpr auto name = "json_unquote"; @@ -1015,6 +1141,7 @@ void register_function_json(SimpleFunctionFactory& factory) { factory.register_function<FunctionJson<FunctionJsonExtractImpl>>(); factory.register_function<FunctionJsonValid>(); + factory.register_function<FunctionJsonContains>(); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md b/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md new file mode 100644 index 0000000000..6f8a8f3da5 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/json-functions/json_contains.md @@ -0,0 +1,69 @@ +--- +{ +"title": "json_contains", +"language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## json_contains +### description +#### Syntax + +`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)` + + +Indicates by returning 1 or 0 whether a given candidate JSON document is contained at a specific path within the json_str JSON document + +### example + +``` +mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}'; +mysql> SET @j2 = '1'; +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.a') | ++-------------------------------+ +| 1 | ++-------------------------------+ +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.b') | ++-------------------------------+ +| 0 | ++-------------------------------+ + +mysql> SET @j2 = '{"d": 4}'; +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.a') | ++-------------------------------+ +| 0 | ++-------------------------------+ +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.c') | ++-------------------------------+ +| 1 | ++-------------------------------+ +``` +### keywords +json,json_contains diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md new file mode 100644 index 0000000000..fa29486648 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_contains.md @@ -0,0 +1,69 @@ +--- +{ +"title": "json_contains", +"language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## json_contains +### description +#### Syntax + +`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)` + + +通过返回 1 或 0 来指示给定的 candidate JSON 文档是否包含在 json_str JSON json_path 路径下的文档中 + +### example + +``` +mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}'; +mysql> SET @j2 = '1'; +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.a') | ++-------------------------------+ +| 1 | ++-------------------------------+ +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.b') | ++-------------------------------+ +| 0 | ++-------------------------------+ + +mysql> SET @j2 = '{"d": 4}'; +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.a') | ++-------------------------------+ +| 0 | ++-------------------------------+ +mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c'); ++-------------------------------+ +| JSON_CONTAINS(@j, @j2, '$.c') | ++-------------------------------+ +| 1 | ++-------------------------------+ +``` +### keywords +json,json_contains diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 6b9b9458ae..c7e36eb76e 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1753,6 +1753,7 @@ visible_functions = { [['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'], [['json_quote'], 'VARCHAR', ['VARCHAR'], ''], [['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'], + [['json_contains'], 'INT', ['VARCHAR', 'VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'], [['json_unquote'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'], [['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], ''] ], diff --git a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out index 3fae061dcc..e776281b5d 100644 --- a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out +++ b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out @@ -146,3 +146,24 @@ doris -- !sql -- 123 +-- !sql -- +1 + +-- !sql -- +0 + +-- !sql -- +0 + +-- !sql -- +1 + +-- !sql -- +0 + +-- !sql -- +1 + +-- !sql -- +1 + diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy index c0a9fe6b8b..2aa4056198 100644 --- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy @@ -74,4 +74,13 @@ suite("test_json_function") { qt_sql "SELECT '{\"k1\": \"v1\", \"k2\": { \"k21\": 6.6, \"k22\": [1, 2, 3] } }'->'\$.k2'->'\$.k22'" qt_sql "SELECT json_unquote('{\"id\": 123, \"name\": \"doris\"}'->'\$.name');" qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name')->'\$.[0]';" + + qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.a');" + qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.b');" + qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.a');" + qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.c');" + qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"city\": \"New York\", \"hobbies\": [\"reading\", \"travelling\"]}', '{\"age\": 31, \"hobbies\": [\"reading\"]}', '\$.');" + qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"projects\": [{\"name\": \"Project A\", \"year\": 2020}, {\"name\": \"Project B\", \"year\": 2021}]}', '{\"projects\": [{\"name\": \"Project A\"}]}', '\$.');" + qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"address\": {\"city\": \"New York\", \"country\": \"USA\"}}', '{\"address\": {\"city\": \"New York\"}}', '\$.');" + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org