This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch fix-json in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1f3f3b0f613a4310a82a48ea76d6f03cbb4bcc2a Author: eldenmoon <15605149...@163.com> AuthorDate: Wed Jul 19 17:10:52 2023 +0800 [Fix](json reader) fix rapidjson `array->PushBack` may take ownership original document With bellow json path `["$.data","$.data.datatimestamp"]` After `array_obj->PushBack` the `data` field owner will be taken from array_obj, and lead to null values for json path `$.data.datatimestamp` Rapidjson doc: ``` //! Append a GenericValue at the end of the array. \note The ownership of \c value will be transferred to this array on success. */ GenericValue& PushBack(GenericValue& value, Allocator& allocator); ``` --- be/src/exprs/json_functions.cpp | 6 +++-- .../data/load_p0/stream_load/test_json_load.out | 5 +++++ .../data/load_p0/stream_load/with_jsonpath.json | 1 + .../load_p0/stream_load/test_json_load.groovy | 26 ++++++++++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 107217ed4f..1dce452e34 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -172,11 +172,13 @@ rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( rapidjson::Value* root = match_value(parsed_paths, document, mem_allocator, true); if (root == nullptr || root == document) { // not found return nullptr; - } else if (!root->IsArray()) { + } else if (!root->IsArray() && wrap_explicitly) { rapidjson::Value* array_obj = nullptr; array_obj = static_cast<rapidjson::Value*>(mem_allocator.Malloc(sizeof(rapidjson::Value))); array_obj->SetArray(); - array_obj->PushBack(*root, mem_allocator); + rapidjson::Value copy; + copy.CopyFrom(*root, mem_allocator); + array_obj->PushBack(std::move(copy), mem_allocator); // set `wrap_explicitly` to true, so that the caller knows that this Array is wrapped actively. *wrap_explicitly = true; return array_obj; diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out b/regression-test/data/load_p0/stream_load/test_json_load.out index 6296f37099..07032bcf83 100644 --- a/regression-test/data/load_p0/stream_load/test_json_load.out +++ b/regression-test/data/load_p0/stream_load/test_json_load.out @@ -199,6 +199,11 @@ 10 hefei 23456710 200 changsha 3456789 +<<<<<<< HEAD -- !select1 -- John 30 New York {"email":"j...@example.com","phone":"+1-123-456-7890"} +======= +-- !select21 -- +11324 1321313082437 1678834024274 20230315 {"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heart [...] +>>>>>>> 7de147ac54 ([Fix](json reader) fix rapidjson `array->PushBack` may take ownership original document) diff --git a/regression-test/data/load_p0/stream_load/with_jsonpath.json b/regression-test/data/load_p0/stream_load/with_jsonpath.json new file mode 100644 index 0000000000..11d14310af --- /dev/null +++ b/regression-test/data/load_p0/stream_load/with_jsonpath.json @@ -0,0 +1 @@ +{"data":{"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heartrate":51,"tsltype":"properties","da [...] diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy b/regression-test/suites/load_p0/stream_load/test_json_load.groovy index 7a69114d11..4933a8d52f 100644 --- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy @@ -588,6 +588,32 @@ suite("test_json_load", "p0") { try_sql("DROP TABLE IF EXISTS ${testTable}") } + // case22: nested and it's member with jsonpath + try { + sql "DROP TABLE IF EXISTS ${testTable}" + sql """CREATE TABLE IF NOT EXISTS ${testTable} + ( + `productid` bigint NOT NULL COMMENT "productid", + `deviceid` bigint NOT NULL COMMENT "deviceid", + `datatimestamp` string NULL COMMENT "datatimestamp", + `dt` int NULL COMMENT "dt", + `data` string + ) + DUPLICATE KEY(`productid`, `deviceid`) + DISTRIBUTED BY RANDOM BUCKETS auto + properties( + "replication_num" = "1" + ); + """ + + load_json_data.call('with_jsonpath', '', 'true', 'json', """productid, deviceid, data, datatimestamp, dt=from_unixtime(substr(datatimestamp,1,10),'%Y%m%d')""", + '["$.productid","$.deviceid","$.data","$.data.datatimestamp"]', '', '', '', 'with_jsonpath.json') + qt_select21 "select * from ${testTable}" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + // if 'enableHdfs' in regression-conf.groovy has been set to true, // the test will run these case as below. if (enableHdfs()) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org