xiaokang commented on code in PR #24938:
URL: https://github.com/apache/doris/pull/24938#discussion_r1390326385
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -1353,6 +1535,8 @@ void register_function_jsonb(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
factory.register_function<FunctionJsonbContains<JsonbContainsImpl>>();
factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
+
factory.register_function<FunctionJsonbContainsPath<JsonbContainsPathWithOnePathImpl>>();
+
factory.register_function<FunctionJsonbContainsPath<JsonbContainsPathWithTwoPathImpl>>();
Review Comment:
`json_contains_path` support not just two paths, but more var args n paths.
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -1353,6 +1535,8 @@ void register_function_jsonb(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
factory.register_function<FunctionJsonbContains<JsonbContainsImpl>>();
factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
+
factory.register_function<FunctionJsonbContainsPath<JsonbContainsPathWithOnePathImpl>>();
+
factory.register_function<FunctionJsonbContainsPath<JsonbContainsPathWithTwoPathImpl>>();
Review Comment:
`[JSON_CONTAINS_PATH(json_doc, one_or_all, path[, path]
...)](https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html#function_json-contains-path)`
mysql ref:
https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html#function_json-contains-path
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -1288,6 +1288,188 @@ struct JsonbContainsAndPathImpl {
}
};
+template <typename Impl>
+class FunctionJsonbContainsPath : public IFunction {
+public:
+ static constexpr auto name = "json_contains_path";
+ String get_name() const override { return name; }
+ static FunctionPtr create() { return
std::make_shared<FunctionJsonbContainsPath<Impl>>(); }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeUInt8>());
+ }
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) const override
{
+ return Impl::execute_impl(context, block, arguments, result,
input_rows_count);
+ }
+};
+
+struct JsonbContainsPathUtil {
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ DCHECK_GE(arguments.size(), 4); // At least 4 arguments are mandatory
+
+ auto jsonb_data_column = block.get_by_position(arguments[0]).column;
+ auto one_or_all_column = block.get_by_position(arguments[1]).column;
+
+ ColumnPtr path1_column;
+ bool is_const1 = false;
+ std::tie(path1_column, is_const1) =
+ unpack_if_const(block.get_by_position(arguments[2]).column);
+
+ JsonbPath path1;
+ if (is_const1) {
+ auto path1_value = path1_column->get_data_at(0);
+ if (!path1.seek(path1_value.data, path1_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path1_value.data),
+ path1_value.size));
+ }
+ }
+
+ ColumnPtr path2_column;
+ bool is_const2 = false;
+ std::tie(path2_column, is_const2) =
+ unpack_if_const(block.get_by_position(arguments[3]).column);
+
+ JsonbPath path2;
+ if (is_const2) {
+ auto path2_value = path2_column->get_data_at(0);
+ if (!path2.seek(path2_value.data, path2_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path2_value.data),
+ path2_value.size));
+ }
+ }
+
+ auto null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto return_type = block.get_data_type(result);
+ MutableColumnPtr res = return_type->create_column();
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (jsonb_data_column->is_null_at(i) ||
one_or_all_column->is_null_at(i) ||
+ path1_column->is_null_at(i) || path2_column->is_null_at(i)) {
+ null_map->get_data()[i] = 1;
+ res->insert_data(nullptr, 0);
+ continue;
+ }
+
+ if (!is_const1) {
+ auto path1_value = path1_column->get_data_at(i);
+ path1.clean();
+ if (!path1.seek(path1_value.data, path1_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path1_value.data),
+ path1_value.size));
+ }
+ }
+
+ if (!is_const2) {
+ auto path2_value = path2_column->get_data_at(i);
+ path2.clean();
+ if (!path2.seek(path2_value.data, path2_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path2_value.data),
+ path2_value.size));
+ }
+ }
+
+ auto jsonb_value = jsonb_data_column->get_data_at(i);
+ auto one_or_all_value =
to_lower(one_or_all_column->get_data_at(i).to_string());
+
+ JsonbDocument* doc =
JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
+
+ if (one_or_all_value != "one" && one_or_all_value != "all") {
+ return Status::InvalidArgument(
+ "The oneOrAll argument to json_contains_path may take
these values: 'one' "
+ "or 'all'.");
+ }
+
+ bool is_one = one_or_all_value == "one";
+
+ bool result_value = false;
+
+ // Assuming JsonbValue has a method like findValue to find a value
by path.
+ JsonbValue* value1 = doc->getValue()->findValue(path1, nullptr);
+ JsonbValue* value2 = doc->getValue()->findValue(path2, nullptr);
+
+ if (is_one) {
+ result_value =
+ value1 ||
+ value2; // In 'one' mode, if either path exists, set
result_value to true.
+ } else { // isAll
+ result_value =
+ value1 &&
+ value2; // In 'all' mode, only if both paths exist,
set result_value to true.
+ }
+
+ res->insert_data(reinterpret_cast<const char*>(&result_value), 0);
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+};
+
+struct JsonbContainsPathWithOnePathImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeJsonb>(),
std::make_shared<DataTypeString>(),
+ std::make_shared<DataTypeString>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto path = ColumnString::create();
+ std::string root_path = "$";
+
+ for (int i = 0; i < input_rows_count; i++) {
+ reinterpret_cast<ColumnString*>(path.get())
+ ->insert_data(root_path.data(), root_path.size());
+ }
+
+ block.insert({std::move(path), std::make_shared<DataTypeString>(),
"path"});
Review Comment:
What's the purpose of path '$'?
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -1288,6 +1288,188 @@ struct JsonbContainsAndPathImpl {
}
};
+template <typename Impl>
+class FunctionJsonbContainsPath : public IFunction {
+public:
+ static constexpr auto name = "json_contains_path";
+ String get_name() const override { return name; }
+ static FunctionPtr create() { return
std::make_shared<FunctionJsonbContainsPath<Impl>>(); }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeUInt8>());
+ }
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) const override
{
+ return Impl::execute_impl(context, block, arguments, result,
input_rows_count);
+ }
+};
+
+struct JsonbContainsPathUtil {
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ DCHECK_GE(arguments.size(), 4); // At least 4 arguments are mandatory
+
+ auto jsonb_data_column = block.get_by_position(arguments[0]).column;
+ auto one_or_all_column = block.get_by_position(arguments[1]).column;
+
+ ColumnPtr path1_column;
+ bool is_const1 = false;
+ std::tie(path1_column, is_const1) =
+ unpack_if_const(block.get_by_position(arguments[2]).column);
+
+ JsonbPath path1;
+ if (is_const1) {
+ auto path1_value = path1_column->get_data_at(0);
+ if (!path1.seek(path1_value.data, path1_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path1_value.data),
+ path1_value.size));
+ }
+ }
+
+ ColumnPtr path2_column;
+ bool is_const2 = false;
+ std::tie(path2_column, is_const2) =
+ unpack_if_const(block.get_by_position(arguments[3]).column);
+
+ JsonbPath path2;
+ if (is_const2) {
+ auto path2_value = path2_column->get_data_at(0);
+ if (!path2.seek(path2_value.data, path2_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path2_value.data),
+ path2_value.size));
+ }
+ }
+
+ auto null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto return_type = block.get_data_type(result);
+ MutableColumnPtr res = return_type->create_column();
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (jsonb_data_column->is_null_at(i) ||
one_or_all_column->is_null_at(i) ||
+ path1_column->is_null_at(i) || path2_column->is_null_at(i)) {
+ null_map->get_data()[i] = 1;
+ res->insert_data(nullptr, 0);
+ continue;
+ }
+
+ if (!is_const1) {
+ auto path1_value = path1_column->get_data_at(i);
+ path1.clean();
+ if (!path1.seek(path1_value.data, path1_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path1_value.data),
+ path1_value.size));
+ }
+ }
+
+ if (!is_const2) {
+ auto path2_value = path2_column->get_data_at(i);
+ path2.clean();
+ if (!path2.seek(path2_value.data, path2_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path2_value.data),
+ path2_value.size));
+ }
+ }
+
+ auto jsonb_value = jsonb_data_column->get_data_at(i);
+ auto one_or_all_value =
to_lower(one_or_all_column->get_data_at(i).to_string());
+
+ JsonbDocument* doc =
JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
+
+ if (one_or_all_value != "one" && one_or_all_value != "all") {
+ return Status::InvalidArgument(
+ "The oneOrAll argument to json_contains_path may take
these values: 'one' "
+ "or 'all'.");
+ }
+
+ bool is_one = one_or_all_value == "one";
+
+ bool result_value = false;
+
+ // Assuming JsonbValue has a method like findValue to find a value
by path.
+ JsonbValue* value1 = doc->getValue()->findValue(path1, nullptr);
+ JsonbValue* value2 = doc->getValue()->findValue(path2, nullptr);
+
+ if (is_one) {
+ result_value =
+ value1 ||
+ value2; // In 'one' mode, if either path exists, set
result_value to true.
+ } else { // isAll
+ result_value =
+ value1 &&
+ value2; // In 'all' mode, only if both paths exist,
set result_value to true.
+ }
+
+ res->insert_data(reinterpret_cast<const char*>(&result_value), 0);
+ }
+
+ block.replace_by_position(result,
+ ColumnNullable::create(std::move(res),
std::move(null_map)));
+ return Status::OK();
+ }
+};
+
+struct JsonbContainsPathWithOnePathImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeJsonb>(),
std::make_shared<DataTypeString>(),
+ std::make_shared<DataTypeString>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto path = ColumnString::create();
+ std::string root_path = "$";
+
+ for (int i = 0; i < input_rows_count; i++) {
+ reinterpret_cast<ColumnString*>(path.get())
+ ->insert_data(root_path.data(), root_path.size());
+ }
+
+ block.insert({std::move(path), std::make_shared<DataTypeString>(),
"path"});
Review Comment:
Where is the path column used?
##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -1288,6 +1288,188 @@ struct JsonbContainsAndPathImpl {
}
};
+template <typename Impl>
+class FunctionJsonbContainsPath : public IFunction {
+public:
+ static constexpr auto name = "json_contains_path";
+ String get_name() const override { return name; }
+ static FunctionPtr create() { return
std::make_shared<FunctionJsonbContainsPath<Impl>>(); }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeUInt8>());
+ }
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) const override
{
+ return Impl::execute_impl(context, block, arguments, result,
input_rows_count);
+ }
+};
+
+struct JsonbContainsPathUtil {
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ DCHECK_GE(arguments.size(), 4); // At least 4 arguments are mandatory
+
+ auto jsonb_data_column = block.get_by_position(arguments[0]).column;
+ auto one_or_all_column = block.get_by_position(arguments[1]).column;
+
+ ColumnPtr path1_column;
+ bool is_const1 = false;
+ std::tie(path1_column, is_const1) =
+ unpack_if_const(block.get_by_position(arguments[2]).column);
+
+ JsonbPath path1;
+ if (is_const1) {
+ auto path1_value = path1_column->get_data_at(0);
+ if (!path1.seek(path1_value.data, path1_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path1_value.data),
+ path1_value.size));
+ }
+ }
+
+ ColumnPtr path2_column;
+ bool is_const2 = false;
+ std::tie(path2_column, is_const2) =
+ unpack_if_const(block.get_by_position(arguments[3]).column);
+
+ JsonbPath path2;
+ if (is_const2) {
+ auto path2_value = path2_column->get_data_at(0);
+ if (!path2.seek(path2_value.data, path2_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path2_value.data),
+ path2_value.size));
+ }
+ }
+
+ auto null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto return_type = block.get_data_type(result);
+ MutableColumnPtr res = return_type->create_column();
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (jsonb_data_column->is_null_at(i) ||
one_or_all_column->is_null_at(i) ||
+ path1_column->is_null_at(i) || path2_column->is_null_at(i)) {
+ null_map->get_data()[i] = 1;
+ res->insert_data(nullptr, 0);
+ continue;
+ }
+
+ if (!is_const1) {
+ auto path1_value = path1_column->get_data_at(i);
+ path1.clean();
+ if (!path1.seek(path1_value.data, path1_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path1_value.data),
+ path1_value.size));
+ }
+ }
+
+ if (!is_const2) {
+ auto path2_value = path2_column->get_data_at(i);
+ path2.clean();
+ if (!path2.seek(path2_value.data, path2_value.size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(path2_value.data),
+ path2_value.size));
+ }
+ }
+
+ auto jsonb_value = jsonb_data_column->get_data_at(i);
+ auto one_or_all_value =
to_lower(one_or_all_column->get_data_at(i).to_string());
Review Comment:
do const optimize for one_or_all
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]