This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch fix-json
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1f3f3b0f613a4310a82a48ea76d6f03cbb4bcc2a
Author: eldenmoon <15605149...@163.com>
AuthorDate: Wed Jul 19 17:10:52 2023 +0800

    [Fix](json reader) fix rapidjson `array->PushBack` may take ownership 
original document
    
    With bellow json path
    `["$.data","$.data.datatimestamp"]`
    
    After `array_obj->PushBack` the `data` field owner will be taken from 
array_obj, and lead to null values for json path `$.data.datatimestamp`
    
    Rapidjson doc:
    ```
    //! Append a GenericValue at the end of the array.
      \note The ownership of \c value will be transferred to this array on 
success.
     */
    GenericValue& PushBack(GenericValue& value, Allocator& allocator);
    ```
---
 be/src/exprs/json_functions.cpp                    |  6 +++--
 .../data/load_p0/stream_load/test_json_load.out    |  5 +++++
 .../data/load_p0/stream_load/with_jsonpath.json    |  1 +
 .../load_p0/stream_load/test_json_load.groovy      | 26 ++++++++++++++++++++++
 4 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index 107217ed4f..1dce452e34 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -172,11 +172,13 @@ rapidjson::Value* 
JsonFunctions::get_json_array_from_parsed_json(
     rapidjson::Value* root = match_value(parsed_paths, document, 
mem_allocator, true);
     if (root == nullptr || root == document) { // not found
         return nullptr;
-    } else if (!root->IsArray()) {
+    } else if (!root->IsArray() && wrap_explicitly) {
         rapidjson::Value* array_obj = nullptr;
         array_obj = 
static_cast<rapidjson::Value*>(mem_allocator.Malloc(sizeof(rapidjson::Value)));
         array_obj->SetArray();
-        array_obj->PushBack(*root, mem_allocator);
+        rapidjson::Value copy;
+        copy.CopyFrom(*root, mem_allocator);
+        array_obj->PushBack(std::move(copy), mem_allocator);
         // set `wrap_explicitly` to true, so that the caller knows that this 
Array is wrapped actively.
         *wrap_explicitly = true;
         return array_obj;
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out 
b/regression-test/data/load_p0/stream_load/test_json_load.out
index 6296f37099..07032bcf83 100644
--- a/regression-test/data/load_p0/stream_load/test_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_json_load.out
@@ -199,6 +199,11 @@
 10     hefei   23456710
 200    changsha        3456789
 
+<<<<<<< HEAD
 -- !select1 --
 John   30      New York        
{"email":"j...@example.com","phone":"+1-123-456-7890"}
+=======
+-- !select21 --
+11324  1321313082437   1678834024274   20230315        
{"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heart
 [...]
+>>>>>>> 7de147ac54 ([Fix](json reader) fix rapidjson `array->PushBack` may 
take ownership original document)
 
diff --git a/regression-test/data/load_p0/stream_load/with_jsonpath.json 
b/regression-test/data/load_p0/stream_load/with_jsonpath.json
new file mode 100644
index 0000000000..11d14310af
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/with_jsonpath.json
@@ -0,0 +1 @@
+{"data":{"base_mac_value_null":24,"base_1_value_respiratoryrate":11,"base_3_value_heartrate":51,"base_3_status_onoroutofbed":3,"base_null_count_circulation":84,"base_1_status_onoroutofbed":3,"base_1_value_heartrate":51,"base_3_value_respiratoryrate":11,"base_3_value_bodyactivityenergy":43652,"base_2_value_respiratoryrate":11,"base_2_value_bodyactivityenergy":28831,"base_2_status_onoroutofbed":3,"base_1_value_bodyactivityenergy":56758,"base_2_value_heartrate":51,"tsltype":"properties","da
 [...]
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index 7a69114d11..4933a8d52f 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -588,6 +588,32 @@ suite("test_json_load", "p0") {
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     } 
 
+    // case22: nested and it's member with jsonpath
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+        sql """CREATE TABLE IF NOT EXISTS ${testTable}
+        (
+         `productid` bigint NOT NULL COMMENT "productid",
+         `deviceid` bigint NOT NULL COMMENT "deviceid",
+         `datatimestamp` string  NULL COMMENT "datatimestamp",
+         `dt` int   NULL COMMENT "dt",
+         `data` string 
+        )
+        DUPLICATE KEY(`productid`, `deviceid`)
+        DISTRIBUTED BY RANDOM BUCKETS auto
+        properties(
+            "replication_num" = "1"
+        );
+        """
+
+        load_json_data.call('with_jsonpath', '', 'true', 'json', """productid, 
deviceid, data, datatimestamp, 
dt=from_unixtime(substr(datatimestamp,1,10),'%Y%m%d')""",
+                
'["$.productid","$.deviceid","$.data","$.data.datatimestamp"]', '', '', '', 
'with_jsonpath.json')
+        qt_select21 "select * from ${testTable}"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
     // if 'enableHdfs' in regression-conf.groovy has been set to true,
     // the test will run these case as below.
     if (enableHdfs()) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to