eldenmoon commented on code in PR #24384:
URL: https://github.com/apache/doris/pull/24384#discussion_r1327568996


##########
be/src/vec/functions/function_json.cpp:
##########
@@ -1131,6 +1131,319 @@ class FunctionJsonUnquote : public IFunction {
     }
 };
 
+struct FunctionJsonInsert {
+    static constexpr auto name = "json_insert";
+    static constexpr auto is_insert = true;

Review Comment:
   why not use enum? boolean value is not intuitive



##########
be/src/vec/functions/function_json.cpp:
##########
@@ -1131,6 +1131,319 @@ class FunctionJsonUnquote : public IFunction {
     }
 };
 
+struct FunctionJsonInsert {
+    static constexpr auto name = "json_insert";
+    static constexpr auto is_insert = true;
+    static constexpr auto is_replace = false;
+};
+
+struct FunctionJsonReplace {
+    static constexpr auto name = "json_replace";
+    static constexpr auto is_insert = false;
+    static constexpr auto is_replace = true;
+};
+struct FunctionJsonSet {
+    static constexpr auto name = "json_set";
+    static constexpr auto is_insert = true;
+    static constexpr auto is_replace = true;
+};
+
+template <typename Kind>
+class FunctionJsonModifyImpl : public IFunction {
+private:
+    // T = std::vector<std::string>
+    // TODO: update RE2 to support std::vector<std::string_view>
+    // if path is not a valid path expression or contains
+    // a * wildcard, return runtime error.
+    template <typename T>
+    Status get_parsed_paths_with_status(const T& path_exprs, 
std::vector<JsonPath>* parsed_paths) {
+        if (UNLIKELY(path_exprs.empty())) {
+            return Status::RuntimeError("json path empty function {}", 
get_name());
+        }
+
+        if (path_exprs[0] != "$") {
+            // keep same behaviour with get_parsed_paths(),
+            // '$[0]' is not invalid path, '$.[0]' is invalid
+            return Status::RuntimeError(
+                    "Invalid JSON path expression. The error is around 
character position 1");
+        }
+        parsed_paths->emplace_back("$", -1, true);
+
+        for (int i = 1; i < path_exprs.size(); i++) {
+            std::string col;
+            std::string index;
+            if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, 
&index))) {
+                return Status::RuntimeError(
+                        "Invalid JSON path expression. The error is around 
character position {}",
+                        i + 1);
+            } else {
+                int idx = -1;
+                if (!index.empty()) {
+                    if (index == "*") {
+                        return Status::RuntimeError(
+                                "In this situation, path expressions may not 
contain the * token");
+                    } else {
+                        idx = atoi(index.c_str());
+                    }
+                }
+                parsed_paths->emplace_back(col, idx, true);
+            }
+        }
+        return Status::OK();
+    }
+
+    Status 
get_parsed_path_columns(std::vector<std::vector<std::vector<JsonPath>>>& 
json_paths,
+                                   const std::vector<const ColumnString*>& 
data_columns,
+                                   size_t input_rows_count) {
+        for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) {
+            json_paths.emplace_back(std::vector<std::vector<JsonPath>>());
+            for (auto row = 0; row < input_rows_count; row++) {
+                const auto path = data_columns[col]->get_data_at(row);
+                std::string_view path_string(path.data, path.size);
+                std::vector<JsonPath> parsed_paths;
+
+#ifdef USE_LIBCPP
+                std::string s(path_string);
+                auto tok = get_json_token(s);
+#else
+                auto tok = get_json_token(path_string);
+#endif
+                std::vector<std::string> paths(tok.begin(), tok.end());
+                auto status = get_parsed_paths_with_status(paths, 
&parsed_paths);

Review Comment:
   why not use macro RETURN_IF_ERROR?



##########
be/src/vec/functions/function_json.cpp:
##########
@@ -1131,6 +1131,319 @@ class FunctionJsonUnquote : public IFunction {
     }
 };
 
+struct FunctionJsonInsert {
+    static constexpr auto name = "json_insert";
+    static constexpr auto is_insert = true;
+    static constexpr auto is_replace = false;
+};
+
+struct FunctionJsonReplace {
+    static constexpr auto name = "json_replace";
+    static constexpr auto is_insert = false;
+    static constexpr auto is_replace = true;
+};
+struct FunctionJsonSet {
+    static constexpr auto name = "json_set";
+    static constexpr auto is_insert = true;
+    static constexpr auto is_replace = true;
+};
+
+template <typename Kind>
+class FunctionJsonModifyImpl : public IFunction {
+private:
+    // T = std::vector<std::string>
+    // TODO: update RE2 to support std::vector<std::string_view>
+    // if path is not a valid path expression or contains
+    // a * wildcard, return runtime error.
+    template <typename T>
+    Status get_parsed_paths_with_status(const T& path_exprs, 
std::vector<JsonPath>* parsed_paths) {
+        if (UNLIKELY(path_exprs.empty())) {
+            return Status::RuntimeError("json path empty function {}", 
get_name());
+        }
+
+        if (path_exprs[0] != "$") {
+            // keep same behaviour with get_parsed_paths(),
+            // '$[0]' is not invalid path, '$.[0]' is invalid
+            return Status::RuntimeError(
+                    "Invalid JSON path expression. The error is around 
character position 1");
+        }
+        parsed_paths->emplace_back("$", -1, true);
+
+        for (int i = 1; i < path_exprs.size(); i++) {
+            std::string col;
+            std::string index;
+            if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, 
&index))) {
+                return Status::RuntimeError(
+                        "Invalid JSON path expression. The error is around 
character position {}",
+                        i + 1);
+            } else {
+                int idx = -1;
+                if (!index.empty()) {
+                    if (index == "*") {
+                        return Status::RuntimeError(
+                                "In this situation, path expressions may not 
contain the * token");
+                    } else {
+                        idx = atoi(index.c_str());
+                    }
+                }
+                parsed_paths->emplace_back(col, idx, true);
+            }
+        }
+        return Status::OK();
+    }
+
+    Status 
get_parsed_path_columns(std::vector<std::vector<std::vector<JsonPath>>>& 
json_paths,
+                                   const std::vector<const ColumnString*>& 
data_columns,
+                                   size_t input_rows_count) {
+        for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) {
+            json_paths.emplace_back(std::vector<std::vector<JsonPath>>());
+            for (auto row = 0; row < input_rows_count; row++) {
+                const auto path = data_columns[col]->get_data_at(row);
+                std::string_view path_string(path.data, path.size);
+                std::vector<JsonPath> parsed_paths;
+
+#ifdef USE_LIBCPP
+                std::string s(path_string);
+                auto tok = get_json_token(s);
+#else
+                auto tok = get_json_token(path_string);
+#endif
+                std::vector<std::string> paths(tok.begin(), tok.end());
+                auto status = get_parsed_paths_with_status(paths, 
&parsed_paths);
+                if (UNLIKELY(status != Status::OK())) {
+                    return status;
+                }
+                json_paths[col / 2].emplace_back(parsed_paths);
+            }
+        }
+        return Status::OK();
+    }
+
+public:
+    static constexpr auto name = Kind::name;
+
+    static FunctionPtr create() { return 
std::make_shared<FunctionJsonModifyImpl<Kind>>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool is_variadic() const override { return true; }
+
+    bool use_default_implementation_for_nulls() const override { return false; 
}
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        bool is_nullable = false;
+        for (auto col = 2; col < arguments.size(); col += 2) {
+            if (arguments[col]->is_nullable()) {
+                is_nullable = true;
+                break;
+            }
+        }
+        return is_nullable ? make_nullable(std::make_shared<DataTypeString>())
+                           : std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto result_column = ColumnString::create();
+        bool is_nullable = false;
+        auto ret_null_map = ColumnUInt8::create(0, 0);
+
+        std::vector<ColumnPtr> column_ptrs; // prevent converted column 
destruct
+        std::vector<const ColumnString*> data_columns;
+        std::vector<const ColumnUInt8*> nullmaps;
+        for (int i = 0; i < arguments.size(); i++) {
+            auto column = block.get_by_position(arguments[i]).column;
+            column_ptrs.push_back(column->convert_to_full_column_if_const());
+            const ColumnNullable* col_nullable =
+                    
check_and_get_column<ColumnNullable>(column_ptrs.back().get());
+            if (col_nullable) {
+                if (!is_nullable) {
+                    is_nullable = true;
+                    ret_null_map = ColumnUInt8::create(input_rows_count, 0);
+                }
+                const ColumnUInt8* col_nullmap = 
check_and_get_column<ColumnUInt8>(
+                        col_nullable->get_null_map_column_ptr().get());
+                nullmaps.push_back(col_nullmap);
+                const ColumnString* col = check_and_get_column<ColumnString>(
+                        col_nullable->get_nested_column_ptr().get());
+                data_columns.push_back(col);
+            } else {
+                nullmaps.push_back(nullptr);
+                data_columns.push_back(assert_cast<const 
ColumnString*>(column_ptrs.back().get()));
+            }
+        }
+
+        auto status = execute_process(
+                data_columns, 
*assert_cast<ColumnString*>(result_column.get()), input_rows_count,
+                nullmaps, is_nullable, 
*assert_cast<ColumnUInt8*>(ret_null_map));
+
+        if (UNLIKELY(status != Status::OK())) {
+            return status;
+        }
+
+        if (is_nullable) {
+            block.replace_by_position(result, 
ColumnNullable::create(std::move(result_column),
+                                                                     
std::move(ret_null_map)));
+        } else {
+            block.get_by_position(result).column = std::move(result_column);
+        }
+        return Status::OK();
+    }
+
+    Status execute_process(const std::vector<const ColumnString*>& 
data_columns,
+                           ColumnString& result_column, size_t 
input_rows_count,
+                           const std::vector<const ColumnUInt8*> nullmaps, 
bool is_nullable,
+                           ColumnUInt8& ret_null_map) {
+        std::string type_flags = 
data_columns.back()->get_data_at(0).to_string();
+
+        std::vector<rapidjson::Document> objects;
+        for (auto row = 0; row < input_rows_count; row++) {
+            objects.emplace_back(rapidjson::kNullType);
+            const auto json_doc = data_columns[0]->get_data_at(row);
+            std::string_view json_str(json_doc.data, json_doc.size);
+            objects[row].Parse(json_str.data(), json_str.size());
+            if (UNLIKELY(objects[row].HasParseError())) {
+                return Status::RuntimeError("invalid json doc function {}", 
get_name());
+            }
+        }
+
+        std::vector<std::vector<std::vector<JsonPath>>> json_paths;
+        auto status = get_parsed_path_columns(json_paths, data_columns, 
input_rows_count);
+        if (UNLIKELY(status != Status::OK())) {

Review Comment:
   use macro RETURN_IF_ERROR?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to