This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 63314b84d25 [bug](function)fix json_replace check return type error (#37014) 63314b84d25 is described below commit 63314b84d25a47e1a0de7f243ae152cf68ea1abb Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Fri Jul 5 18:43:06 2024 +0800 [bug](function)fix json_replace check return type error (#37014) 1. fix the return type dcheck error: ``` mysql [test]>select (json_replace(a, '$.fparam.nested_2', "qwe")) from json_table_2 limit 1; ERROR 1105 (HY000): errCode = 2, detailMessage = (10.16.10.8)[INTERNAL_ERROR]Function json_replace get failed, expr is VectorizedFnCall[json_replace](arguments=a, String, String, String,return=Nullable(String)) and return type is Nullable(String). ``` 2. improve the json_replace/json_insert/json_set function execute of not convert const column, test about could faster 1s on 1000w table rows --- be/src/vec/functions/function_json.cpp | 75 +++++++++++++--------- .../json_function/test_query_json_replace.out | 5 ++ .../json_function/test_query_json_replace.groovy | 23 +++++++ 3 files changed, 74 insertions(+), 29 deletions(-) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 2faeb24d514..cb667d2dc76 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -1346,11 +1346,13 @@ private: Status get_parsed_path_columns(std::vector<std::vector<std::vector<JsonPath>>>& json_paths, const std::vector<const ColumnString*>& data_columns, - size_t input_rows_count) const { + size_t input_rows_count, + std::vector<bool>& column_is_consts) const { for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) { json_paths.emplace_back(std::vector<std::vector<JsonPath>>()); for (auto row = 0; row < input_rows_count; row++) { - const auto path = data_columns[col]->get_data_at(row); + const auto path = data_columns[col]->get_data_at( + index_check_const(row, column_is_consts[col])); std::string_view path_string(path.data, path.size); std::vector<JsonPath> parsed_paths; @@ -1384,7 +1386,7 @@ public: DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { bool is_nullable = false; // arguments: (json_str, path, val[, path, val...], type_flag) - for (auto col = 2; col < arguments.size() - 1; col += 2) { + for (auto col = 0; col < arguments.size() - 1; col += 1) { if (arguments[col]->is_nullable()) { is_nullable = true; break; @@ -1398,36 +1400,42 @@ public: size_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); bool is_nullable = false; - auto ret_null_map = ColumnUInt8::create(0, 0); + ColumnUInt8::MutablePtr ret_null_map = nullptr; + ColumnUInt8::Container* ret_null_map_data = nullptr; - std::vector<ColumnPtr> column_ptrs; // prevent converted column destruct std::vector<const ColumnString*> data_columns; std::vector<const ColumnUInt8*> nullmaps; + std::vector<bool> column_is_consts; for (int i = 0; i < arguments.size(); i++) { - auto column = block.get_by_position(arguments[i]).column; - column_ptrs.push_back(column->convert_to_full_column_if_const()); - const ColumnNullable* col_nullable = - check_and_get_column<ColumnNullable>(column_ptrs.back().get()); + ColumnPtr arg_col; + bool arg_const; + std::tie(arg_col, arg_const) = + unpack_if_const(block.get_by_position(arguments[i]).column); + const auto* col_nullable = check_and_get_column<ColumnNullable>(arg_col.get()); + column_is_consts.push_back(arg_const); if (col_nullable) { if (!is_nullable) { is_nullable = true; - ret_null_map = ColumnUInt8::create(input_rows_count, 0); } - const ColumnUInt8* col_nullmap = check_and_get_column<ColumnUInt8>( + const ColumnUInt8* col_nullmap = assert_cast<const ColumnUInt8*>( col_nullable->get_null_map_column_ptr().get()); nullmaps.push_back(col_nullmap); - const ColumnString* col = check_and_get_column<ColumnString>( + const ColumnString* col = assert_cast<const ColumnString*>( col_nullable->get_nested_column_ptr().get()); data_columns.push_back(col); } else { nullmaps.push_back(nullptr); - data_columns.push_back(assert_cast<const ColumnString*>(column_ptrs.back().get())); + data_columns.push_back(assert_cast<const ColumnString*>(arg_col.get())); } } - + //*assert_cast<ColumnUInt8*>(ret_null_map.get()) + if (is_nullable) { + ret_null_map = ColumnUInt8::create(input_rows_count, 0); + ret_null_map_data = &(ret_null_map->get_data()); + } RETURN_IF_ERROR(execute_process( data_columns, *assert_cast<ColumnString*>(result_column.get()), input_rows_count, - nullmaps, is_nullable, *assert_cast<ColumnUInt8*>(ret_null_map.get()))); + nullmaps, is_nullable, ret_null_map_data, column_is_consts)); if (is_nullable) { block.replace_by_position(result, ColumnNullable::create(std::move(result_column), @@ -1441,13 +1449,15 @@ public: Status execute_process(const std::vector<const ColumnString*>& data_columns, ColumnString& result_column, size_t input_rows_count, const std::vector<const ColumnUInt8*> nullmaps, bool is_nullable, - ColumnUInt8& ret_null_map) const { + ColumnUInt8::Container* ret_null_map_data, + std::vector<bool>& column_is_consts) const { std::string type_flags = data_columns.back()->get_data_at(0).to_string(); std::vector<rapidjson::Document> objects; for (auto row = 0; row < input_rows_count; row++) { objects.emplace_back(rapidjson::kNullType); - const auto json_doc = data_columns[0]->get_data_at(row); + const auto json_doc = + data_columns[0]->get_data_at(index_check_const(row, column_is_consts[0])); std::string_view json_str(json_doc.data, json_doc.size); objects[row].Parse(json_str.data(), json_str.size()); if (UNLIKELY(objects[row].HasParseError())) { @@ -1457,9 +1467,10 @@ public: } std::vector<std::vector<std::vector<JsonPath>>> json_paths; - RETURN_IF_ERROR(get_parsed_path_columns(json_paths, data_columns, input_rows_count)); + RETURN_IF_ERROR(get_parsed_path_columns(json_paths, data_columns, input_rows_count, + column_is_consts)); - execute_parse(type_flags, data_columns, objects, json_paths, nullmaps); + execute_parse(type_flags, data_columns, objects, json_paths, nullmaps, column_is_consts); rapidjson::StringBuffer buf; rapidjson::Writer<rapidjson::StringBuffer> writer(buf); @@ -1469,7 +1480,7 @@ public: writer.Reset(buf); objects[i].Accept(writer); if (is_nullable && objects[i].IsNull()) { - ret_null_map.get_data()[i] = 1; + (*ret_null_map_data)[i] = 1; } result_column.insert_data(buf.GetString(), buf.GetSize()); } @@ -1483,11 +1494,12 @@ public: const std::vector<const ColumnString*>& data_columns, std::vector<rapidjson::Document>& objects, std::vector<std::vector<std::vector<JsonPath>>>& json_paths, - const std::vector<const ColumnUInt8*>& nullmaps) { + const std::vector<const ColumnUInt8*>& nullmaps, + std::vector<bool>& column_is_consts) { for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) { - constexpr_int_match<'0', '6', Reducer>::run(type_flags[col + 1], objects, - json_paths[col / 2], data_columns[col + 1], - nullmaps[col + 1]); + constexpr_int_match<'0', '6', Reducer>::run( + type_flags[col + 1], objects, json_paths[col / 2], data_columns[col + 1], + nullmaps[col + 1], column_is_consts[col + 1]); } } @@ -1569,18 +1581,23 @@ public: template <typename TypeImpl> static void execute_type(std::vector<rapidjson::Document>& objects, std::vector<std::vector<JsonPath>>& paths_column, - const ColumnString* value_column, const ColumnUInt8* nullmap) { + const ColumnString* value_column, const ColumnUInt8* nullmap, + bool column_is_const) { StringParser::ParseResult result; rapidjson::Value value; for (auto row = 0; row < objects.size(); row++) { std::vector<JsonPath>* parsed_paths = &paths_column[row]; if (nullmap != nullptr && nullmap->get_data()[row]) { - JsonParser<'0'>::update_value(result, value, value_column->get_data_at(row), - objects[row].GetAllocator()); + JsonParser<'0'>::update_value( + result, value, + value_column->get_data_at(index_check_const(row, column_is_const)), + objects[row].GetAllocator()); } else { - TypeImpl::update_value(result, value, value_column->get_data_at(row), - objects[row].GetAllocator()); + TypeImpl::update_value( + result, value, + value_column->get_data_at(index_check_const(row, column_is_const)), + objects[row].GetAllocator()); } switch (Kind::modify_type) { diff --git a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out index 408aa2ecb77..3ba95483878 100644 --- a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out +++ b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out @@ -11,3 +11,8 @@ null {"id":3,"time":null,"a1":[1,9],"a2":[1,2]} {"id":4,"time":null,"a1":[1,null],"a2":[1,2]} +-- !sql2 -- +{"id":1,"time":"2022-01-01 11:45:14","a1":[1,9],"a2":[1,2]} +{"id":2,"time":"2022-01-01 11:45:14","a1":[1,null],"a2":[1,2]} +{"id":3,"time":null,"a1":[1,9],"a2":[1,2]} + diff --git a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy index bdf26abecb4..a870169d962 100644 --- a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy @@ -42,4 +42,27 @@ suite("test_query_json_replace", "query") { sql "insert into ${tableName} values(4,null,null);" qt_sql1 "select json_replace('{\"id\": 0, \"time\": \"1970-01-01 00:00:00\", \"a1\": [1, 2], \"a2\": [1, 2]}', '\$.id', id, '\$.time', time, '\$.a1[1]', k, '\$.a2[3]', k) from ${tableName} order by id;" sql "DROP TABLE ${tableName};" + + sql "DROP TABLE IF EXISTS test_query_json_replace_nullable" + sql """ + CREATE TABLE test_query_json_replace_nullable ( + `id` int(11) null, + `time` datetime, + `k` int(11) + ) ENGINE=OLAP + DUPLICATE KEY(`id`,`time`,`k`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + sql "insert into test_query_json_replace_nullable values(1,'2022-01-01 11:45:14',9);" + sql "insert into test_query_json_replace_nullable values(2,'2022-01-01 11:45:14',null);" + sql "insert into test_query_json_replace_nullable values(3,null,9);" + qt_sql2 "select json_replace('{\"id\": 0, \"time\": \"1970-01-01 00:00:00\", \"a1\": [1, 2], \"a2\": [1, 2]}', '\$.id', id, '\$.time', time, '\$.a1[1]', k, '\$.a2[3]', k) from test_query_json_replace_nullable order by id;" + + sql "DROP TABLE test_query_json_replace_nullable;" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org