This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 63314b84d25 [bug](function)fix json_replace check return type error 
(#37014)
63314b84d25 is described below

commit 63314b84d25a47e1a0de7f243ae152cf68ea1abb
Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com>
AuthorDate: Fri Jul 5 18:43:06 2024 +0800

    [bug](function)fix json_replace check return type error (#37014)
    
    1. fix the return type dcheck error:
    ```
    mysql [test]>select (json_replace(a, '$.fparam.nested_2', "qwe")) from 
json_table_2 limit 1;
    ERROR 1105 (HY000): errCode = 2, detailMessage = 
(10.16.10.8)[INTERNAL_ERROR]Function json_replace get failed, expr is 
VectorizedFnCall[json_replace](arguments=a, String, String, 
String,return=Nullable(String)) and return type is Nullable(String).
    ```
    
    2. improve the json_replace/json_insert/json_set function execute of not
    convert const column, test about could faster 1s on 1000w table rows
---
 be/src/vec/functions/function_json.cpp             | 75 +++++++++++++---------
 .../json_function/test_query_json_replace.out      |  5 ++
 .../json_function/test_query_json_replace.groovy   | 23 +++++++
 3 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/be/src/vec/functions/function_json.cpp 
b/be/src/vec/functions/function_json.cpp
index 2faeb24d514..cb667d2dc76 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -1346,11 +1346,13 @@ private:
 
     Status 
get_parsed_path_columns(std::vector<std::vector<std::vector<JsonPath>>>& 
json_paths,
                                    const std::vector<const ColumnString*>& 
data_columns,
-                                   size_t input_rows_count) const {
+                                   size_t input_rows_count,
+                                   std::vector<bool>& column_is_consts) const {
         for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) {
             json_paths.emplace_back(std::vector<std::vector<JsonPath>>());
             for (auto row = 0; row < input_rows_count; row++) {
-                const auto path = data_columns[col]->get_data_at(row);
+                const auto path = data_columns[col]->get_data_at(
+                        index_check_const(row, column_is_consts[col]));
                 std::string_view path_string(path.data, path.size);
                 std::vector<JsonPath> parsed_paths;
 
@@ -1384,7 +1386,7 @@ public:
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         bool is_nullable = false;
         // arguments: (json_str, path, val[, path, val...], type_flag)
-        for (auto col = 2; col < arguments.size() - 1; col += 2) {
+        for (auto col = 0; col < arguments.size() - 1; col += 1) {
             if (arguments[col]->is_nullable()) {
                 is_nullable = true;
                 break;
@@ -1398,36 +1400,42 @@ public:
                         size_t result, size_t input_rows_count) const override 
{
         auto result_column = ColumnString::create();
         bool is_nullable = false;
-        auto ret_null_map = ColumnUInt8::create(0, 0);
+        ColumnUInt8::MutablePtr ret_null_map = nullptr;
+        ColumnUInt8::Container* ret_null_map_data = nullptr;
 
-        std::vector<ColumnPtr> column_ptrs; // prevent converted column 
destruct
         std::vector<const ColumnString*> data_columns;
         std::vector<const ColumnUInt8*> nullmaps;
+        std::vector<bool> column_is_consts;
         for (int i = 0; i < arguments.size(); i++) {
-            auto column = block.get_by_position(arguments[i]).column;
-            column_ptrs.push_back(column->convert_to_full_column_if_const());
-            const ColumnNullable* col_nullable =
-                    
check_and_get_column<ColumnNullable>(column_ptrs.back().get());
+            ColumnPtr arg_col;
+            bool arg_const;
+            std::tie(arg_col, arg_const) =
+                    
unpack_if_const(block.get_by_position(arguments[i]).column);
+            const auto* col_nullable = 
check_and_get_column<ColumnNullable>(arg_col.get());
+            column_is_consts.push_back(arg_const);
             if (col_nullable) {
                 if (!is_nullable) {
                     is_nullable = true;
-                    ret_null_map = ColumnUInt8::create(input_rows_count, 0);
                 }
-                const ColumnUInt8* col_nullmap = 
check_and_get_column<ColumnUInt8>(
+                const ColumnUInt8* col_nullmap = assert_cast<const 
ColumnUInt8*>(
                         col_nullable->get_null_map_column_ptr().get());
                 nullmaps.push_back(col_nullmap);
-                const ColumnString* col = check_and_get_column<ColumnString>(
+                const ColumnString* col = assert_cast<const ColumnString*>(
                         col_nullable->get_nested_column_ptr().get());
                 data_columns.push_back(col);
             } else {
                 nullmaps.push_back(nullptr);
-                data_columns.push_back(assert_cast<const 
ColumnString*>(column_ptrs.back().get()));
+                data_columns.push_back(assert_cast<const 
ColumnString*>(arg_col.get()));
             }
         }
-
+        //*assert_cast<ColumnUInt8*>(ret_null_map.get())
+        if (is_nullable) {
+            ret_null_map = ColumnUInt8::create(input_rows_count, 0);
+            ret_null_map_data = &(ret_null_map->get_data());
+        }
         RETURN_IF_ERROR(execute_process(
                 data_columns, 
*assert_cast<ColumnString*>(result_column.get()), input_rows_count,
-                nullmaps, is_nullable, 
*assert_cast<ColumnUInt8*>(ret_null_map.get())));
+                nullmaps, is_nullable, ret_null_map_data, column_is_consts));
 
         if (is_nullable) {
             block.replace_by_position(result, 
ColumnNullable::create(std::move(result_column),
@@ -1441,13 +1449,15 @@ public:
     Status execute_process(const std::vector<const ColumnString*>& 
data_columns,
                            ColumnString& result_column, size_t 
input_rows_count,
                            const std::vector<const ColumnUInt8*> nullmaps, 
bool is_nullable,
-                           ColumnUInt8& ret_null_map) const {
+                           ColumnUInt8::Container* ret_null_map_data,
+                           std::vector<bool>& column_is_consts) const {
         std::string type_flags = 
data_columns.back()->get_data_at(0).to_string();
 
         std::vector<rapidjson::Document> objects;
         for (auto row = 0; row < input_rows_count; row++) {
             objects.emplace_back(rapidjson::kNullType);
-            const auto json_doc = data_columns[0]->get_data_at(row);
+            const auto json_doc =
+                    data_columns[0]->get_data_at(index_check_const(row, 
column_is_consts[0]));
             std::string_view json_str(json_doc.data, json_doc.size);
             objects[row].Parse(json_str.data(), json_str.size());
             if (UNLIKELY(objects[row].HasParseError())) {
@@ -1457,9 +1467,10 @@ public:
         }
 
         std::vector<std::vector<std::vector<JsonPath>>> json_paths;
-        RETURN_IF_ERROR(get_parsed_path_columns(json_paths, data_columns, 
input_rows_count));
+        RETURN_IF_ERROR(get_parsed_path_columns(json_paths, data_columns, 
input_rows_count,
+                                                column_is_consts));
 
-        execute_parse(type_flags, data_columns, objects, json_paths, nullmaps);
+        execute_parse(type_flags, data_columns, objects, json_paths, nullmaps, 
column_is_consts);
 
         rapidjson::StringBuffer buf;
         rapidjson::Writer<rapidjson::StringBuffer> writer(buf);
@@ -1469,7 +1480,7 @@ public:
             writer.Reset(buf);
             objects[i].Accept(writer);
             if (is_nullable && objects[i].IsNull()) {
-                ret_null_map.get_data()[i] = 1;
+                (*ret_null_map_data)[i] = 1;
             }
             result_column.insert_data(buf.GetString(), buf.GetSize());
         }
@@ -1483,11 +1494,12 @@ public:
                               const std::vector<const ColumnString*>& 
data_columns,
                               std::vector<rapidjson::Document>& objects,
                               std::vector<std::vector<std::vector<JsonPath>>>& 
json_paths,
-                              const std::vector<const ColumnUInt8*>& nullmaps) 
{
+                              const std::vector<const ColumnUInt8*>& nullmaps,
+                              std::vector<bool>& column_is_consts) {
         for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) {
-            constexpr_int_match<'0', '6', Reducer>::run(type_flags[col + 1], 
objects,
-                                                        json_paths[col / 2], 
data_columns[col + 1],
-                                                        nullmaps[col + 1]);
+            constexpr_int_match<'0', '6', Reducer>::run(
+                    type_flags[col + 1], objects, json_paths[col / 2], 
data_columns[col + 1],
+                    nullmaps[col + 1], column_is_consts[col + 1]);
         }
     }
 
@@ -1569,18 +1581,23 @@ public:
     template <typename TypeImpl>
     static void execute_type(std::vector<rapidjson::Document>& objects,
                              std::vector<std::vector<JsonPath>>& paths_column,
-                             const ColumnString* value_column, const 
ColumnUInt8* nullmap) {
+                             const ColumnString* value_column, const 
ColumnUInt8* nullmap,
+                             bool column_is_const) {
         StringParser::ParseResult result;
         rapidjson::Value value;
         for (auto row = 0; row < objects.size(); row++) {
             std::vector<JsonPath>* parsed_paths = &paths_column[row];
 
             if (nullmap != nullptr && nullmap->get_data()[row]) {
-                JsonParser<'0'>::update_value(result, value, 
value_column->get_data_at(row),
-                                              objects[row].GetAllocator());
+                JsonParser<'0'>::update_value(
+                        result, value,
+                        value_column->get_data_at(index_check_const(row, 
column_is_const)),
+                        objects[row].GetAllocator());
             } else {
-                TypeImpl::update_value(result, value, 
value_column->get_data_at(row),
-                                       objects[row].GetAllocator());
+                TypeImpl::update_value(
+                        result, value,
+                        value_column->get_data_at(index_check_const(row, 
column_is_const)),
+                        objects[row].GetAllocator());
             }
 
             switch (Kind::modify_type) {
diff --git 
a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out
 
b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out
index 408aa2ecb77..3ba95483878 100644
--- 
a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out
+++ 
b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out
@@ -11,3 +11,8 @@ null
 {"id":3,"time":null,"a1":[1,9],"a2":[1,2]}
 {"id":4,"time":null,"a1":[1,null],"a2":[1,2]}
 
+-- !sql2 --
+{"id":1,"time":"2022-01-01 11:45:14","a1":[1,9],"a2":[1,2]}
+{"id":2,"time":"2022-01-01 11:45:14","a1":[1,null],"a2":[1,2]}
+{"id":3,"time":null,"a1":[1,9],"a2":[1,2]}
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy
 
b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy
index bdf26abecb4..a870169d962 100644
--- 
a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy
@@ -42,4 +42,27 @@ suite("test_query_json_replace", "query") {
     sql "insert into ${tableName} values(4,null,null);"
     qt_sql1 "select json_replace('{\"id\": 0, \"time\": \"1970-01-01 
00:00:00\", \"a1\": [1, 2], \"a2\": [1, 2]}', '\$.id', id, '\$.time', time, 
'\$.a1[1]', k, '\$.a2[3]', k) from ${tableName} order by id;"
     sql "DROP TABLE ${tableName};"
+
+    sql "DROP TABLE IF EXISTS test_query_json_replace_nullable"
+    sql """
+            CREATE TABLE test_query_json_replace_nullable (
+              `id` int(11) null,
+              `time` datetime,
+              `k` int(11)
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`,`time`,`k`)
+            COMMENT "OLAP"
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "in_memory" = "false",
+            "storage_format" = "V2"
+            );
+    """
+    sql "insert into test_query_json_replace_nullable values(1,'2022-01-01 
11:45:14',9);"
+    sql "insert into test_query_json_replace_nullable values(2,'2022-01-01 
11:45:14',null);"
+    sql "insert into test_query_json_replace_nullable values(3,null,9);"
+    qt_sql2 "select json_replace('{\"id\": 0, \"time\": \"1970-01-01 
00:00:00\", \"a1\": [1, 2], \"a2\": [1, 2]}', '\$.id', id, '\$.time', time, 
'\$.a1[1]', k, '\$.a2[3]', k) from test_query_json_replace_nullable order by 
id;"
+
+    sql "DROP TABLE test_query_json_replace_nullable;"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to