github-actions[bot] commented on code in PR #40962:
URL: https://github.com/apache/doris/pull/40962#discussion_r1766242394


##########
be/src/vec/functions/function_jsonb.cpp:
##########
@@ -1426,6 +1428,355 @@ struct JsonbContainsAndPathImpl {
     }
 };
 
+class FunctionJsonSearch : public IFunction {
+private:
+    using OneFun = std::function<Status(size_t, bool*)>;
+    static Status always_one(size_t i, bool* res) {
+        *res = true;
+        return Status::OK();
+    }
+    static Status always_all(size_t i, bool* res) {
+        *res = false;
+        return Status::OK();
+    }
+
+    using CheckNullFun = std::function<bool(size_t)>;
+    static bool always_not_null(size_t) { return false; }
+    static bool always_null(size_t) { return true; }
+
+    using GetJsonStringRefFun = std::function<StringRef(size_t)>;
+
+    Status matched(const std::string_view& str, LikeState* state, unsigned 
char* res) const {
+        StringRef pattern; // not used
+        StringRef value_val(str.data(), str.size());
+        return (state->scalar_function)(&state->search_state, value_val, 
pattern, res);
+    }
+
+    /**
+     * Recursive search for matching string, if found, the result will be 
added to a vector
+     * @param element json element
+     * @param one_match
+     * @param search_str
+     * @param cur_path
+     * @param matches The path that has already been matched
+     * @return true if matched else false
+     */
+    bool find_matches(const SimdJSONParser::Element& element, const bool& 
one_match,
+                      LikeState* state, JsonbPath* cur_path,
+                      std::unordered_set<std::string>* matches) const {
+        if (element.isString()) {
+            const std::string_view str = element.getString();
+            unsigned char res;
+            RETURN_IF_ERROR(matched(str, state, &res));
+            if (res) {
+                std::string str;
+                auto valid = cur_path->to_string(&str);
+                if (!valid) {
+                    return false;
+                }
+                auto res = matches->insert(str);
+                return res.second;
+            } else {
+                return false;
+            }
+        } else if (element.isObject()) {
+            const SimdJSONParser::Object& object = element.getObject();
+            bool find = false;
+            for (size_t i = 0; i < object.size(); ++i) {
+                const SimdJSONParser::KeyValuePair& item = object[i];
+                const std::string_view& key = item.first;
+                const SimdJSONParser::Element& child_element = item.second;
+                // construct an object member path leg.
+                auto leg = 
std::make_unique<leg_info>(const_cast<char*>(key.data()), key.size(), 0,
+                                                      MEMBER_CODE);
+                cur_path->add_leg_to_leg_vector(std::move(leg));
+                find |= find_matches(child_element, one_match, state, 
cur_path, matches);
+                cur_path->pop_leg_from_leg_vector();
+                if (one_match && find) {
+                    return true;
+                }
+            }
+            return find;
+        } else if (element.isArray()) {
+            const SimdJSONParser::Array& array = element.getArray();
+            bool find = false;
+            for (size_t i = 0; i < array.size(); ++i) {
+                auto leg = std::make_unique<leg_info>(nullptr, 0, i, 
ARRAY_CODE);
+                cur_path->add_leg_to_leg_vector(std::move(leg));
+                const SimdJSONParser::Element& child_element = array[i];
+                // construct an array cell path leg.
+                find |= find_matches(child_element, one_match, state, 
cur_path, matches);
+                cur_path->pop_leg_from_leg_vector();
+                if (one_match && find) {
+                    return true;
+                }
+            }
+            return find;
+        } else {
+            return false;
+        }
+    }
+
+    void make_result_str(std::unordered_set<std::string>& matches, 
ColumnString* result_col) const {
+        JsonbWriter writer;
+        if (matches.size() == 1) {
+            for (const auto& str_ref : matches) {
+                writer.writeStartString();
+                writer.writeString(str_ref);
+                writer.writeEndString();
+            }
+        } else {
+            writer.writeStartArray();
+            for (const auto& str_ref : matches) {
+                writer.writeStartString();
+                writer.writeString(str_ref);
+                writer.writeEndString();
+            }
+            writer.writeEndArray();
+        }
+
+        result_col->insert_data(writer.getOutput()->getBuffer(),
+                                (size_t)writer.getOutput()->getSize());
+    }
+
+    template <bool search_is_const>
+    Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun 
json_null_check,
+                          GetJsonStringRefFun col_json_string, CheckNullFun 
one_null_check,
+                          OneFun one_check, CheckNullFun search_null_check,
+                          const ColumnString* col_search_string, 
FunctionContext* context,
+                          size_t result) const {
+        auto result_col = ColumnString::create();
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+
+        std::shared_ptr<LikeState> state_ptr;
+        LikeState* state = nullptr;
+        if (search_is_const) {
+            state = reinterpret_cast<LikeState*>(
+                    
context->get_function_state(FunctionContext::THREAD_LOCAL));
+        }
+
+        SimdJSONParser parser;
+        SimdJSONParser::Element root_element;
+        bool is_one = false;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            // an error occurs if the json_doc argument is not a valid json 
document.
+            if (json_null_check(i)) {
+                null_map->get_data()[i] = 1;
+                result_col->insert_data("", 0);
+                continue;
+            }
+            const auto& json_doc = col_json_string(i);
+            if (!parser.parse({json_doc.data, json_doc.size}, root_element)) {
+                return Status::InvalidArgument(
+                        "the json_doc argument {} is not a valid json 
document", json_doc);
+            }
+
+            if (!one_null_check(i)) {
+                RETURN_IF_ERROR(one_check(i, &is_one));
+            }
+
+            if (one_null_check(i) || search_null_check(i)) {
+                null_map->get_data()[i] = 1;
+                result_col->insert_data("", 0);
+                continue;
+            }
+
+            // an error occurs if any path argument is not a valid path 
expression.
+            std::string root_path_str = "$";
+            JsonbPath root_path;
+            root_path.seek(root_path_str.c_str(), root_path_str.size());
+            std::vector<JsonbPath*> paths;
+            paths.push_back(&root_path);
+
+            if (!search_is_const) {
+                state_ptr = std::make_shared<LikeState>();
+                state_ptr->is_like_pattern = true;
+                const auto& search_str = col_search_string->get_data_at(i);
+                RETURN_IF_ERROR(
+                        FunctionLike::construct_like_const_state(context, 
search_str, state_ptr));
+                state = state_ptr.get();
+            }
+
+            // maintain a hashset to deduplicate matches.
+            std::unordered_set<std::string> matches;
+            for (const auto& item : paths) {
+                auto cur_path = item;
+                auto find = find_matches(root_element, is_one, state, 
cur_path, &matches);
+                if (is_one && find) {
+                    break;
+                }
+            }
+            if (matches.empty()) {
+                // returns NULL if the search_str is not found in the document.
+                null_map->get_data()[i] = 1;
+                result_col->insert_data("", 0);
+                continue;
+            }
+            make_result_str(matches, result_col.get());
+        }
+        auto result_col_nullable =
+                ColumnNullable::create(std::move(result_col), 
std::move(null_map));
+        block.replace_by_position(result, std::move(result_col_nullable));
+        return Status::OK();
+    }
+
+    static constexpr auto one = "one";
+    static constexpr auto all = "all";
+
+public:
+    static constexpr auto name = "json_search";
+    static FunctionPtr create() { return 
std::make_shared<FunctionJsonSearch>(); }
+
+    String get_name() const override { return name; }
+    bool is_variadic() const override { return false; }
+    size_t get_number_of_arguments() const override { return 3; }

Review Comment:
   warning: function 'get_number_of_arguments' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] size_t get_number_of_arguments() const override { return 
3; }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to