This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-1.1-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push: new afebb82066 [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729) afebb82066 is described below commit afebb82066eac5439ac08afd24d8d57c1b285f8e Author: ChPi <chji...@gmail.com> AuthorDate: Mon Oct 31 08:46:46 2022 +0800 [Bug](Vectorized)fix json_object and json_array function return wrong result on vectorized engine (#13729) * [Bug][Vectorized] fix json_object and json_array function return wrong result on vectorized engine Co-authored-by: chenjie <chen...@cecdat.com> --- be/src/vec/functions/function_json.cpp | 92 ++++++++++++++++++---- gensrc/script/doris_builtins_functions.py | 4 +- .../json_function/test_query_json_array.out | 15 ++++ .../json_function/test_query_json_object.out | 20 +++-- ..._object.groovy => test_query_json_array.groovy} | 31 +++++--- .../json_function/test_query_json_object.groovy | 27 ++++--- 6 files changed, 141 insertions(+), 48 deletions(-) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 2019ac5910..1c88ecc902 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -415,22 +415,28 @@ struct FunctionJsonArrayImpl { static void execute_parse(const std::string& type_flags, const std::vector<const ColumnString*>& data_columns, std::vector<rapidjson::Value>& objects, - rapidjson::Document::AllocatorType& allocator) { + rapidjson::Document::AllocatorType& allocator, + const std::vector<const ColumnUInt8*>& nullmaps) { for (int i = 0; i < data_columns.size() - 1; i++) { constexpr_loop_match<'0', '6', JsonParser>(type_flags[i], objects, allocator, - data_columns[i]); + data_columns[i], nullmaps[i]); } } template <typename TypeImpl> static void execute_type(std::vector<rapidjson::Value>& objects, rapidjson::Document::AllocatorType& allocator, - const ColumnString* data_column) { + const ColumnString* data_column, const ColumnUInt8* nullmap) { StringParser::ParseResult result; rapidjson::Value value; for (int i = 0; i < objects.size(); i++) { - TypeImpl::update_value(result, value, data_column->get_data_at(i), allocator); + if (nullmap != nullptr && nullmap->get_data()[i]) { + JsonParser<'0'>::update_value(result, value, data_column->get_data_at(i), + allocator); + } else { + TypeImpl::update_value(result, value, data_column->get_data_at(i), allocator); + } objects[i].PushBack(value, allocator); } } @@ -444,40 +450,96 @@ struct FunctionJsonObjectImpl { static void execute_parse(std::string type_flags, const std::vector<const ColumnString*>& data_columns, std::vector<rapidjson::Value>& objects, - rapidjson::Document::AllocatorType& allocator) { + rapidjson::Document::AllocatorType& allocator, + const std::vector<const ColumnUInt8*>& nullmaps) { for (auto& array_object : objects) { array_object.SetObject(); } - for (int i = 0; i + 1 < data_columns.size() - 1; i += 2) { constexpr_loop_match<'0', '6', JsonParser>(type_flags[i + 1], objects, allocator, - data_columns[i], data_columns[i + 1]); + data_columns[i], data_columns[i + 1], + nullmaps[i + 1]); } } template <typename TypeImpl> static void execute_type(std::vector<rapidjson::Value>& objects, rapidjson::Document::AllocatorType& allocator, - const ColumnString* key_column, const ColumnString* value_column) { + const ColumnString* key_column, const ColumnString* value_column, + const ColumnUInt8* nullmap) { StringParser::ParseResult result; rapidjson::Value key; rapidjson::Value value; - for (int i = 0; i < objects.size(); i++) { JsonParser<'4'>::update_value(result, key, key_column->get_data_at(i), allocator); // key always is string - TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator); + if (nullmap != nullptr && nullmap->get_data()[i]) { + JsonParser<'0'>::update_value(result, value, value_column->get_data_at(i), + allocator); + } else { + TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator); + } objects[i].AddMember(key, value, allocator); } } }; template <typename SpecificImpl> -struct FunctionJsonImpl { +class FunctionJsonAlwaysNotNullable : public IFunction { +public: static constexpr auto name = SpecificImpl::name; + static FunctionPtr create() { + return std::make_shared<FunctionJsonAlwaysNotNullable<SpecificImpl>>(); + } + + bool use_default_implementation_for_nulls() const override { return false; } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 0; } + + bool is_variadic() const override { return true; } + + bool use_default_implementation_for_constants() const override { return true; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto result_column = ColumnString::create(); + + std::vector<ColumnPtr> column_ptrs; // prevent converted column destruct + std::vector<const ColumnString*> data_columns; + std::vector<const ColumnUInt8*> nullmaps; + for (int i = 0; i < arguments.size(); i++) { + auto column = block.get_by_position(arguments[i]).column; + column_ptrs.push_back(column->convert_to_full_column_if_const()); + const ColumnNullable* col_nullable = + check_and_get_column<ColumnNullable>(column_ptrs.back().get()); + if (col_nullable) { + const ColumnUInt8* col_nullmap = check_and_get_column<ColumnUInt8>( + col_nullable->get_null_map_column_ptr().get()); + nullmaps.push_back(col_nullmap); + const ColumnString* col = check_and_get_column<ColumnString>( + col_nullable->get_nested_column_ptr().get()); + data_columns.push_back(col); + } else { + nullmaps.push_back(nullptr); + data_columns.push_back(assert_cast<const ColumnString*>(column_ptrs.back().get())); + } + } + execute(data_columns, *assert_cast<ColumnString*>(result_column.get()), input_rows_count, + nullmaps); + block.get_by_position(result).column = std::move(result_column); + return Status::OK(); + } + static void execute(const std::vector<const ColumnString*>& data_columns, - ColumnString& result_column, size_t input_rows_count) { + ColumnString& result_column, size_t input_rows_count, + const std::vector<const ColumnUInt8*> nullmaps) { std::string type_flags = data_columns.back()->get_data_at(0).to_string(); rapidjson::Document document; @@ -488,7 +550,7 @@ struct FunctionJsonImpl { objects.emplace_back(rapidjson::kArrayType); } - SpecificImpl::execute_parse(type_flags, data_columns, objects, allocator); + SpecificImpl::execute_parse(type_flags, data_columns, objects, allocator, nullmaps); rapidjson::StringBuffer buf; rapidjson::Writer<rapidjson::StringBuffer> writer(buf); @@ -572,8 +634,8 @@ void register_function_json(SimpleFunctionFactory& factory) { factory.register_function<FunctionGetJsonDouble>(); factory.register_function<FunctionGetJsonString>(); - factory.register_function<FunctionJson<FunctionJsonImpl<FunctionJsonArrayImpl>>>(); - factory.register_function<FunctionJson<FunctionJsonImpl<FunctionJsonObjectImpl>>>(); + factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonArrayImpl>>(); + factory.register_function<FunctionJsonAlwaysNotNullable<FunctionJsonObjectImpl>>(); factory.register_function<FunctionJson<FunctionJsonQuoteImpl>>(); } diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index b758c00ead..ab0353f826 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1111,10 +1111,10 @@ visible_functions = [ [['json_array'], 'VARCHAR', ['VARCHAR', '...'], '_ZN5doris13JsonFunctions10json_arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], [['json_object'], 'VARCHAR', ['VARCHAR', '...'], '_ZN5doris13JsonFunctions11json_objectEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', - '', '', 'vec', ''], + '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], [['json_quote'], 'VARCHAR', ['VARCHAR'], '_ZN5doris13JsonFunctions10json_quoteEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], diff --git a/regression-test/data/query/sql_functions/json_function/test_query_json_array.out b/regression-test/data/query/sql_functions/json_function/test_query_json_array.out new file mode 100644 index 0000000000..bc6c2d0e7f --- /dev/null +++ b/regression-test/data/query/sql_functions/json_function/test_query_json_array.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +["k0",1,"k1",null,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"] +["k0",2,"k1",1,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"] +["k0",3,"k1",null,"k2",true,"k3",null,"k4",null,"k5",null,"k6","k6"] +["k0",4,"k1",null,"k2",null,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"] +["k0",5,"k1",1,"k2",true,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"] + +-- !sql2 -- +["k0",1,"k1",null,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"] +["k0",2,"k1",1,"k2",null,"k3",null,"k4",null,"k5",null,"k6","k6"] +["k0",3,"k1",null,"k2",true,"k3",null,"k4",null,"k5",null,"k6","k6"] +["k0",4,"k1",null,"k2",null,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"] +["k0",5,"k1",1,"k2",true,"k3","test","k4","2022-01-01 11:11:11","k5",null,"k6","k6"] + diff --git a/regression-test/data/query/sql_functions/json_function/test_query_json_object.out b/regression-test/data/query/sql_functions/json_function/test_query_json_object.out index ee1dd4abd8..f270e8484e 100644 --- a/regression-test/data/query/sql_functions/json_function/test_query_json_object.out +++ b/regression-test/data/query/sql_functions/json_function/test_query_json_object.out @@ -1,9 +1,15 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !sql -- -{"k1":null} -{"k1":null} -{"k1":null} -{"k1":null} -{"k1":null} -{"k1":1} +-- !sql1 -- +{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"} +{"k0":2,"k1":1,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"} +{"k0":3,"k1":null,"k2":true,"k3":null,"k4":null,"k5":null,"k6":"k6"} +{"k0":4,"k1":null,"k2":null,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"} +{"k0":5,"k1":1,"k2":true,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"} + +-- !sql2 -- +{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"} +{"k0":2,"k1":1,"k2":null,"k3":null,"k4":null,"k5":null,"k6":"k6"} +{"k0":3,"k1":null,"k2":true,"k3":null,"k4":null,"k5":null,"k6":"k6"} +{"k0":4,"k1":null,"k2":null,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"} +{"k0":5,"k1":1,"k2":true,"k3":"test","k4":"2022-01-01 11:11:11","k5":null,"k6":"k6"} diff --git a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy similarity index 53% copy from regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy copy to regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy index 924bffcc75..6a4ef939a3 100644 --- a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy +++ b/regression-test/suites/query/sql_functions/json_function/test_query_json_array.groovy @@ -15,29 +15,34 @@ // specific language governing permissions and limitations // under the License. -suite("test_query_json_object", "query") { +suite("test_query_json_array", "query") { sql "set enable_vectorized_engine = false;" - def tableName = "test_query_json_object" + def tableName = "test_query_json_array" sql "DROP TABLE IF EXISTS ${tableName}" sql """ - CREATE TABLE `${tableName}` ( - `k1` int(11) NULL COMMENT "user id" + CREATE TABLE ${tableName} ( + `k0` int(11) not null, + `k1` int(11) NULL, + `k2` boolean NULL, + `k3` varchar(255), + `k4` datetime ) ENGINE=OLAP - DUPLICATE KEY(`k1`) + DUPLICATE KEY(`k0`,`k1`,`k2`,`k3`,`k4`) COMMENT "OLAP" - DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "in_memory" = "false", "storage_format" = "V2" ); """ - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(1);" - qt_sql "select json_object(\"k1\",k1) from ${tableName};" + sql "insert into ${tableName} values(1,null,null,null,null);" + sql "insert into ${tableName} values(2,1,null,null,null);" + sql "insert into ${tableName} values(3,null,true,null,null);" + sql "insert into ${tableName} values(4,null,null,'test','2022-01-01 11:11:11');" + sql "insert into ${tableName} values(5,1,true,'test','2022-01-01 11:11:11');" + qt_sql1 "select json_array('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};" + sql "set enable_vectorized_engine = true;" + qt_sql2 "select json_array('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};" sql "DROP TABLE ${tableName};" } diff --git a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy index 924bffcc75..1f35bdf151 100644 --- a/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy +++ b/regression-test/suites/query/sql_functions/json_function/test_query_json_object.groovy @@ -20,24 +20,29 @@ suite("test_query_json_object", "query") { def tableName = "test_query_json_object" sql "DROP TABLE IF EXISTS ${tableName}" sql """ - CREATE TABLE `${tableName}` ( - `k1` int(11) NULL COMMENT "user id" + CREATE TABLE ${tableName} ( + `k0` int(11) not null, + `k1` int(11) NULL, + `k2` boolean NULL, + `k3` varchar(255), + `k4` datetime ) ENGINE=OLAP - DUPLICATE KEY(`k1`) + DUPLICATE KEY(`k0`,`k1`,`k2`,`k3`,`k4`) COMMENT "OLAP" - DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 PROPERTIES ( "replication_allocation" = "tag.location.default: 1", "in_memory" = "false", "storage_format" = "V2" ); """ - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(null);" - sql "insert into ${tableName} values(1);" - qt_sql "select json_object(\"k1\",k1) from ${tableName};" + sql "insert into ${tableName} values(1,null,null,null,null);" + sql "insert into ${tableName} values(2,1,null,null,null);" + sql "insert into ${tableName} values(3,null,true,null,null);" + sql "insert into ${tableName} values(4,null,null,'test','2022-01-01 11:11:11');" + sql "insert into ${tableName} values(5,1,true,'test','2022-01-01 11:11:11');" + qt_sql1 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};" + sql "set enable_vectorized_engine = true;" + qt_sql2 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4,'k5', null,'k6','k6') from ${tableName};" sql "DROP TABLE ${tableName};" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org