This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch branch-2.0-var in repository https://gitbox.apache.org/repos/asf/doris.git
commit 757685bbc8dbd3a94b518d1a4a592a533759cdc2 Author: lihangyu <15605149...@163.com> AuthorDate: Sat Sep 2 20:15:18 2023 +0800 [chore](json reader) add original data to error messge for tracing (#22803) --- be/src/vec/exec/format/json/new_json_reader.cpp | 20 +++++++++++++++----- be/src/vec/exec/format/json/new_json_reader.h | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index 108f68f23c..70fc676b45 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -1068,10 +1068,13 @@ Status NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc // prevent from endless loop _next_row = _total_rows + 1; fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "Parse json data for array failed. code: {}, error info: {}", - e.error(), e.what()); + fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(), + e.what()); RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return ""; }, + [&]() -> std::string { + return std::string(_simdjson_ondemand_padding_buffer.data(), + _original_doc_size); + }, [&]() -> std::string { return fmt::to_string(error_msg); }, eof)); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. @@ -1167,7 +1170,10 @@ Status NewJsonReader::_simdjson_handle_flat_array_complex_json( fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(), e.what()); RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return ""; }, + [&]() -> std::string { + return std::string(_simdjson_ondemand_padding_buffer.data(), + _original_doc_size); + }, [&]() -> std::string { return fmt::to_string(error_msg); }, eof)); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. @@ -1235,7 +1241,10 @@ Status NewJsonReader::_simdjson_handle_nested_complex_json( fmt::format_to(error_msg, "Parse json data failed. code: {}, error info: {}", e.error(), e.what()); RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return ""; }, + [&]() -> std::string { + return std::string(_simdjson_ondemand_padding_buffer.data(), + _original_doc_size); + }, [&]() -> std::string { return fmt::to_string(error_msg); }, eof)); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. @@ -1484,6 +1493,7 @@ Status NewJsonReader::_simdjson_parse_json_doc(size_t* size, bool* eof) { *size -= 3; } memcpy(&_simdjson_ondemand_padding_buffer.front(), json_str, *size); + _original_doc_size = *size; auto error = _ondemand_json_parser ->iterate(std::string_view(_simdjson_ondemand_padding_buffer.data(), *size), diff --git a/be/src/vec/exec/format/json/new_json_reader.h b/be/src/vec/exec/format/json/new_json_reader.h index 99651ba055..8a61dc334a 100644 --- a/be/src/vec/exec/format/json/new_json_reader.h +++ b/be/src/vec/exec/format/json/new_json_reader.h @@ -248,6 +248,7 @@ private: // simdjson static constexpr size_t _init_buffer_size = 1024 * 1024 * 8; size_t _padded_size = _init_buffer_size + simdjson::SIMDJSON_PADDING; + size_t _original_doc_size = 0; std::string _simdjson_ondemand_padding_buffer; std::string _simdjson_ondemand_unscape_padding_buffer; // char _simdjson_ondemand_padding_buffer[_padded_size]; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org