This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 973f2878ba4 [fix](simdjson) fix simdjson reader for read json object 
array when jsonroot set #38490 (#38500)
973f2878ba4 is described below

commit 973f2878ba4ae53dedcbcc2e4dee2852581af72c
Author: amory <wangqian...@selectdb.com>
AuthorDate: Tue Jul 30 15:51:15 2024 +0800

    [fix](simdjson) fix simdjson reader for read json object array when 
jsonroot set #38490 (#38500)
---
 be/src/vec/exec/format/json/new_json_reader.cpp    |   3 +-
 be/src/vec/exec/format/json/new_json_reader.h      |   1 +
 .../load_p0/stream_load/load_object_array_json.out |   4 +
 .../load_p0/stream_load/test_json_object_array.csv |   1 +
 .../stream_load/load_object_array_json.groovy      | 100 +++++++++++++++++++++
 5 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index 0ef75a8ebfe..e3ed69b150e 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1251,7 +1251,7 @@ Status 
NewJsonReader::_simdjson_handle_flat_array_complex_json(
             bool valid = true;
             cur = (*_array_iter).get_object();
             // extract root
-            if (_parsed_json_root.size() != 0) {
+            if (!_parsed_from_json_root && _parsed_json_root.size() != 0) {
                 simdjson::ondemand::value val;
                 Status st = JsonFunctions::extract_from_object(cur, 
_parsed_json_root, &val);
                 if (UNLIKELY(!st.ok())) {
@@ -1668,6 +1668,7 @@ Status NewJsonReader::_simdjson_parse_json_doc(size_t* 
size, bool* eof) {
                 fmt::format_to(error_msg, "{}", st.to_string());
                 return return_quality_error(error_msg, 
std::string((char*)json_str, *size));
             }
+            _parsed_from_json_root = true;
         } catch (simdjson::simdjson_error& e) {
             fmt::memory_buffer error_msg;
             fmt::format_to(error_msg, "Encounter error while 
extract_from_object, error: {}",
diff --git a/be/src/vec/exec/format/json/new_json_reader.h 
b/be/src/vec/exec/format/json/new_json_reader.h
index 9d4696a6422..6ad33fb4ffb 100644
--- a/be/src/vec/exec/format/json/new_json_reader.h
+++ b/be/src/vec/exec/format/json/new_json_reader.h
@@ -223,6 +223,7 @@ private:
 
     std::vector<std::vector<JsonPath>> _parsed_jsonpaths;
     std::vector<JsonPath> _parsed_json_root;
+    bool _parsed_from_json_root = false; // to avoid parsing json root 
multiple times
 
     char _value_buffer[4 * 1024 * 1024]; // 4MB
     char _parse_buffer[512 * 1024];      // 512KB
diff --git 
a/regression-test/data/load_p0/stream_load/load_object_array_json.out 
b/regression-test/data/load_p0/stream_load/load_object_array_json.out
new file mode 100644
index 00000000000..53598f37e7d
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/load_object_array_json.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1021021338780262401    1021021338700570624     0       239.0000        
876219500005C31942      0       0.0000  128.0000        1       
2024-07-19T11:34:17     239.0000        0       0.0000
+
diff --git 
a/regression-test/data/load_p0/stream_load/test_json_object_array.csv 
b/regression-test/data/load_p0/stream_load/test_json_object_array.csv
new file mode 100644
index 00000000000..63fc1b5d460
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_json_object_array.csv
@@ -0,0 +1 @@
+{"data":[{"id":"1021021338780262401","type":"INSERT","owner_id":"0","amount_tag":"239.0","barcode":"876219500005C31942","retail_order_bill_id":"1021021338700570624","status":"0","amount_retail":"0.0","amount":"128.0","qty":"1","timestamp":"2024-07-19
 
11:34:17","price_cost":"239.0","is_gift":"0","amount_discount":"0.0"}],"type":"INSERT"}
diff --git 
a/regression-test/suites/load_p0/stream_load/load_object_array_json.groovy 
b/regression-test/suites/load_p0/stream_load/load_object_array_json.groovy
new file mode 100644
index 00000000000..aeb7f9297cc
--- /dev/null
+++ b/regression-test/suites/load_p0/stream_load/load_object_array_json.groovy
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("load_object_array_json", "p0") {
+    // define a sql table
+    def testTable = "load_object_array_json"
+
+    def create_test_table = {
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+              `id` bigint(20) NOT NULL COMMENT "",
+                `retail_order_bill_id` bigint(20) NULL COMMENT "",
+                `owner_id` int(11) NULL COMMENT "",
+                `amount_tag` decimal(12, 4) NULL COMMENT "",
+                `barcode` varchar(128) NULL COMMENT "",
+                `status` int(11) NULL COMMENT "",
+                `amount_retail` decimal(12, 4) NULL COMMENT "",
+                `amount` decimal(12, 4) NULL COMMENT "",
+                `qty` int(11) NULL COMMENT "",
+                `timestamp` datetime NULL COMMENT "时间戳",
+                `price_cost` decimal(12, 4) NULL COMMENT "",
+                `is_gift` int(11) NULL COMMENT "",
+                `amount_discount` decimal(12, 4) NULL COMMENT "",
+            ) ENGINE=OLAP
+            UNIQUE KEY(`id`, `retail_order_bill_id`)
+            DISTRIBUTED BY HASH(`retail_order_bill_id`) BUCKETS 10
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+            """
+    }
+
+    def load_data = {table_name, file_name ->
+        // load the json data
+        streamLoad {
+            table table_name
+
+            // set http request header params
+            set 'strip_outer_array', 'true'
+            set 'read_json_by_line', 'true'
+            set 'format', 'json'
+            set 'columns', 
'id,owner_id,amount_tag,barcode,retail_order_bill_id,status,amount_retail,amount,qty,timestamp,price_cost,is_gift,amount_discount'
+            set 'jsonpaths', 
'[\"$.id\",\"$.owner_id\",\"$.amount_tag\",\"$.barcode\",\"$.retail_order_bill_id\",\"$.status\",\"$.amount_retail\",\"$.amount\",\"$.qty\",\"$.timestamp\",\"$.price_cost\",\"$.is_gift\",\"$.amount_discount\"]'
+            set 'json_root', '$.data'
+            set 'fuzzy_parse', 'false'
+            set 'max_filter_ratio', '1'
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+
+            // if declared a check callback, the default check condition will 
ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows + 
json.NumberUnselectedRows
+                             + json.NumberFilteredRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+    }
+
+    def check_data_correct = {table_name ->
+        sql "sync"
+        // select the table and check whether the data is correct
+        qt_select "select * from ${table_name} order by id"
+    }
+
+    // case1: import array data in json format and enable vectorized engine
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        create_test_table.call()
+
+        load_data.call(testTable, 'test_json_object_array.csv')
+
+        check_data_correct(testTable)
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to