This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ef2f765e432 [Bug](json reader) object should stop processing when 
encounter error (#31159)
ef2f765e432 is described below

commit ef2f765e4324035242525deaa8321bfb71fa7f33
Author: lihangyu <15605149...@163.com>
AuthorDate: Wed Feb 21 10:13:43 2024 +0800

    [Bug](json reader) object should stop processing when encounter error 
(#31159)
    
    If DATA_QUALITY_ERROR encountered we should stop processing this document 
any more.Otherwise there will be UB in simdjson.
---
 be/src/exprs/json_functions.cpp                    |  2 +-
 be/src/vec/exec/format/json/new_json_reader.cpp    |  6 ++--
 .../data/load_p0/stream_load/test_json_load.out    |  4 +++
 .../stream_load/test_malformed_json_with_path.json |  3 ++
 .../load_p0/stream_load/test_json_load.groovy      | 33 ++++++++++++++++++++--
 5 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index ff432c4655a..29c1596ed8f 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -261,7 +261,7 @@ Status 
JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
         const std::string& _msg = msg;                                         
             \
         if (UNLIKELY(_err)) {                                                  
             \
             if (_err == simdjson::NO_SUCH_FIELD || _err == 
simdjson::INDEX_OUT_OF_BOUNDS) { \
-                return Status::DataQualityError(                               
             \
+                return Status::NotFound<false>(                                
             \
                         fmt::format("Not found target filed, err: {}, msg: 
{}",             \
                                     simdjson::error_message(_err), _msg));     
             \
             }                                                                  
             \
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index 97affdcd0bb..514a925cba4 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1199,7 +1199,7 @@ Status 
NewJsonReader::_simdjson_handle_flat_array_complex_json_write_columns(
                 simdjson::ondemand::value val;
                 Status st = JsonFunctions::extract_from_object(cur, 
_parsed_json_root, &val);
                 if (UNLIKELY(!st.ok())) {
-                    if (st.is<DATA_QUALITY_ERROR>()) {
+                    if (st.is<NOT_FOUND>()) {
                         RETURN_IF_ERROR(_append_error_msg(nullptr, 
st.to_string(), "", nullptr));
                         ADVANCE_ROW();
                         continue;
@@ -1630,11 +1630,11 @@ Status 
NewJsonReader::_simdjson_write_columns_by_jsonpath(
         Status st;
         if (i < _parsed_jsonpaths.size()) {
             st = JsonFunctions::extract_from_object(*value, 
_parsed_jsonpaths[i], &json_value);
-            if (!st.ok() && !st.is<DATA_QUALITY_ERROR>()) {
+            if (!st.ok() && !st.is<NOT_FOUND>()) {
                 return st;
             }
         }
-        if (i >= _parsed_jsonpaths.size() || st.is<DATA_QUALITY_ERROR>()) {
+        if (i >= _parsed_jsonpaths.size() || st.is<NOT_FOUND>()) {
             // not match in jsondata, filling with default value
             RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
             if (!(*valid)) {
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out 
b/regression-test/data/load_p0/stream_load/test_json_load.out
index b9250608475..7351891633d 100644
--- a/regression-test/data/load_p0/stream_load/test_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_json_load.out
@@ -241,3 +241,7 @@ John        30      New York        
{"email":"j...@example.com","phone":"+1-123-456-7890"}
 100    2345676
 200    755
 
+-- !select26 --
+android        \N      \N      \N      \N      \N
+android        \N      \N      \N      \N      \N
+
diff --git 
a/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json 
b/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
new file mode 100644
index 00000000000..f87ebaa5d3b
--- /dev/null
+++ 
b/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
@@ -0,0 +1,3 @@
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","event_value":"device_id":"gikj78675678","media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","device_id":"gikj78675678","media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","device_id":"gikj78675678","syscode":123,
 
"media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index 816765b1233..f41610f3ba2 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -727,6 +727,35 @@ suite("test_json_load", "p0") {
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
 
+    // case27: import json with malformed json along with json path
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        sql """CREATE TABLE IF NOT EXISTS ${testTable} 
+            (
+                `syscode` VARCHAR(20)  NOT NULL COMMENT "",
+                `event_dt` DateTime NULL COMMENT "",
+                `pro_brand` VARCHAR(20)  COMMENT "",
+                `app_package`  VARCHAR(50) COMMENT "",
+                `platform` VARCHAR(20) COMMENT "",
+                `log_num`  BIGINT DEFAULT "0" COMMENT ""
+            )
+            DUPLICATE KEY(`syscode`, 
`event_dt`,`pro_brand`,`app_package`,`platform`)
+            COMMENT ''
+            DISTRIBUTED BY RANDOM BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );"""
+
+        load_json_data.call("${testTable}", "${testTable}_case27_1", 'false', 
'true', 'json', 'id= id * 10', 
'[\"$.platform\",\"$.app_package\",\"$.sysCode\",\"$.sys_code\",\"$.proBrand\",\"$.pro_brand\",\"$.event_time\"]',
+                             '', '', '', 'test_malformed_json_with_path.json', 
false, 2)
+        sql "sync"
+        qt_select26 "select * from ${testTable}"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
     // test jsonpaths error
     try {
         sql "DROP TABLE IF EXISTS ${testTable}"
@@ -734,7 +763,7 @@ suite("test_json_load", "p0") {
         create_json_test_table.call(testTable)
         streamLoad {
             table "${testTable}"
-            set 'jsonpaths', '[\"Name\", \"Age\", \"Agent_id\"]'
+            set 'jsonpaths', '[\"$.Name\", \"$.Age\", \"$.Agent_id\"]'
             set 'format', 'json'
             file 'test_json_error.json' // import json file
             time 10000 // limit inflight 10s
@@ -754,7 +783,7 @@ suite("test_json_load", "p0") {
                 def code = process.waitFor()
                 def out = process.text
                 log.info("result: ${out}".toString())
-                def reason = "Reason: There is no column matching jsonpaths in 
the json file, columns:[name, age, agent_id, ], please check columns and 
jsonpaths:[\"Name\", \"Age\", \"Agent_id\"]. src line 
[{\"name\":\"Name1\",\"age\":21,\"agent_id\":\"1\"}]; \n"
+                def reason = "Reason: There is no column matching jsonpaths in 
the json file, columns:[name, age, agent_id, ], please check columns and 
jsonpaths:[\"\$.Name\", \"\$.Age\", \"\$.Agent_id\"]. src line 
[{\"name\":\"Name1\",\"age\":21,\"agent_id\":\"1\"}]; \n"
                 assertEquals("${reason}", "${out}")
             }
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to