This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-2.0-var
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 757685bbc8dbd3a94b518d1a4a592a533759cdc2
Author: lihangyu <15605149...@163.com>
AuthorDate: Sat Sep 2 20:15:18 2023 +0800

    [chore](json reader) add original data to error messge for tracing (#22803)
---
 be/src/vec/exec/format/json/new_json_reader.cpp | 20 +++++++++++++++-----
 be/src/vec/exec/format/json/new_json_reader.h   |  1 +
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index 108f68f23c..70fc676b45 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1068,10 +1068,13 @@ Status 
NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc
             // prevent from endless loop
             _next_row = _total_rows + 1;
             fmt::memory_buffer error_msg;
-            fmt::format_to(error_msg, "Parse json data for array failed. code: 
{}, error info: {}",
-                           e.error(), e.what());
+            fmt::format_to(error_msg, "Parse json data failed. code: {}, error 
info: {}", e.error(),
+                           e.what());
             RETURN_IF_ERROR(_state->append_error_msg_to_file(
-                    [&]() -> std::string { return ""; },
+                    [&]() -> std::string {
+                        return 
std::string(_simdjson_ondemand_padding_buffer.data(),
+                                           _original_doc_size);
+                    },
                     [&]() -> std::string { return fmt::to_string(error_msg); 
}, eof));
             _counter->num_rows_filtered++;
             // Before continuing to process other rows, we need to first clean 
the fail parsed row.
@@ -1167,7 +1170,10 @@ Status 
NewJsonReader::_simdjson_handle_flat_array_complex_json(
             fmt::format_to(error_msg, "Parse json data failed. code: {}, error 
info: {}", e.error(),
                            e.what());
             RETURN_IF_ERROR(_state->append_error_msg_to_file(
-                    [&]() -> std::string { return ""; },
+                    [&]() -> std::string {
+                        return 
std::string(_simdjson_ondemand_padding_buffer.data(),
+                                           _original_doc_size);
+                    },
                     [&]() -> std::string { return fmt::to_string(error_msg); 
}, eof));
             _counter->num_rows_filtered++;
             // Before continuing to process other rows, we need to first clean 
the fail parsed row.
@@ -1235,7 +1241,10 @@ Status 
NewJsonReader::_simdjson_handle_nested_complex_json(
             fmt::format_to(error_msg, "Parse json data failed. code: {}, error 
info: {}", e.error(),
                            e.what());
             RETURN_IF_ERROR(_state->append_error_msg_to_file(
-                    [&]() -> std::string { return ""; },
+                    [&]() -> std::string {
+                        return 
std::string(_simdjson_ondemand_padding_buffer.data(),
+                                           _original_doc_size);
+                    },
                     [&]() -> std::string { return fmt::to_string(error_msg); 
}, eof));
             _counter->num_rows_filtered++;
             // Before continuing to process other rows, we need to first clean 
the fail parsed row.
@@ -1484,6 +1493,7 @@ Status NewJsonReader::_simdjson_parse_json_doc(size_t* 
size, bool* eof) {
         *size -= 3;
     }
     memcpy(&_simdjson_ondemand_padding_buffer.front(), json_str, *size);
+    _original_doc_size = *size;
     auto error =
             _ondemand_json_parser
                     
->iterate(std::string_view(_simdjson_ondemand_padding_buffer.data(), *size),
diff --git a/be/src/vec/exec/format/json/new_json_reader.h 
b/be/src/vec/exec/format/json/new_json_reader.h
index 99651ba055..8a61dc334a 100644
--- a/be/src/vec/exec/format/json/new_json_reader.h
+++ b/be/src/vec/exec/format/json/new_json_reader.h
@@ -248,6 +248,7 @@ private:
     // simdjson
     static constexpr size_t _init_buffer_size = 1024 * 1024 * 8;
     size_t _padded_size = _init_buffer_size + simdjson::SIMDJSON_PADDING;
+    size_t _original_doc_size = 0;
     std::string _simdjson_ondemand_padding_buffer;
     std::string _simdjson_ondemand_unscape_padding_buffer;
     // char _simdjson_ondemand_padding_buffer[_padded_size];


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to