This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3290eedb216d1741d1db78ebc22a1b1660cf3600 Author: lihangyu <15605149...@163.com> AuthorDate: Fri Jul 21 17:02:01 2023 +0800 [Fix](json reader) fix rapidjson `array->PushBack` may take ownership… (#21988) With bellow json path `["$.data","$.data.datatimestamp"]` After `array_obj->PushBack` the `data` field owner will be taken from array_obj, and lead to null values for json path `$.data.datatimestamp` Rapidjson doc: ``` //! Append a GenericValue at the end of the array. \note The ownership of \c value will be transferred to this array on success. */ GenericValue& PushBack(GenericValue& value, Allocator& allocator); ``` --- be/src/exprs/json_functions.cpp | 6 +++-- .../data/load_p0/stream_load/test_json_load.out | 3 +++ .../data/load_p0/stream_load/with_jsonpath.json | 1 + .../load_p0/stream_load/test_json_load.groovy | 27 ++++++++++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 98756a4d8b..fd69d822f1 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -395,11 +395,13 @@ rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( rapidjson::Value* root = match_value(parsed_paths, document, mem_allocator, true); if (root == nullptr || root == document) { // not found return nullptr; - } else if (!root->IsArray()) { + } else if (!root->IsArray() && wrap_explicitly) { rapidjson::Value* array_obj = nullptr; array_obj = static_cast<rapidjson::Value*>(mem_allocator.Malloc(sizeof(rapidjson::Value))); array_obj->SetArray(); - array_obj->PushBack(*root, mem_allocator); + rapidjson::Value copy; + copy.CopyFrom(*root, mem_allocator); + array_obj->PushBack(std::move(copy), mem_allocator); // set `wrap_explicitly` to true, so that the caller knows that this Array is wrapped actively. *wrap_explicitly = true; return array_obj; diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out b/regression-test/data/load_p0/stream_load/test_json_load.out index 29cbc7998f..3434f4fb4f 100644 --- a/regression-test/data/load_p0/stream_load/test_json_load.out +++ b/regression-test/data/load_p0/stream_load/test_json_load.out @@ -334,3 +334,6 @@ 2 shanghai 2345672 200 changsha 3456789 +-- !select22 -- +11324 1321313082437 1678834024274 20230315 {"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heart [...] + diff --git a/regression-test/data/load_p0/stream_load/with_jsonpath.json b/regression-test/data/load_p0/stream_load/with_jsonpath.json new file mode 100644 index 0000000000..11d14310af --- /dev/null +++ b/regression-test/data/load_p0/stream_load/with_jsonpath.json @@ -0,0 +1 @@ +{"data":{"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heartrate":51,"tsltype":"properties","da [...] diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy b/regression-test/suites/load_p0/stream_load/test_json_load.groovy index 7ae1221454..b4501bec9f 100644 --- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy @@ -684,6 +684,33 @@ suite("test_json_load", "p0") { try_sql("DROP TABLE IF EXISTS ${testTable}") } + // case22: nested and it's member with jsonpath + try { + testTable = "test_json_load" + sql "DROP TABLE IF EXISTS ${testTable}" + sql """CREATE TABLE IF NOT EXISTS ${testTable} + ( + `productid` bigint NOT NULL COMMENT "productid", + `deviceid` bigint NOT NULL COMMENT "deviceid", + `datatimestamp` string NULL COMMENT "datatimestamp", + `dt` int NULL COMMENT "dt", + `data` string + ) + DUPLICATE KEY(`productid`, `deviceid`) + DISTRIBUTED BY RANDOM BUCKETS auto + properties( + "replication_num" = "1" + ); + """ +1 + load_json_data.call("${testTable}", 'with_jsonpath', '', 'true', 'json', """productid, deviceid, data, datatimestamp, dt=from_unixtime(substr(datatimestamp,1,10),'%Y%m%d')""", + '["$.productid","$.deviceid","$.data","$.data.datatimestamp"]', '', '', '', 'with_jsonpath.json') + qt_select22 "select * from ${testTable}" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + // if 'enableHdfs' in regression-conf.groovy has been set to true, // the test will run these case as below. if (enableHdfs()) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org