This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit f9fa2d4f2e8448e6fe2b945a651ef7611c0f27e7 Author: Mingyu Chen <morning...@163.com> AuthorDate: Sat Dec 10 18:45:47 2022 +0800 [fix](csv-reader) fix be crash when reading invalid value (#14951) --- be/src/exec/text_converter.hpp | 10 ++++++++-- be/src/vec/core/block.cpp | 12 ++++++++++++ be/src/vec/core/block.h | 2 ++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp index 45a7a4e570..55aee22d0a 100644 --- a/be/src/exec/text_converter.hpp +++ b/be/src/exec/text_converter.hpp @@ -205,6 +205,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, } } + bool insert_after_parse_failure = true; StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; // Parse the raw-text data. Translate the text string to internal format. switch (slot_desc->type().type) { @@ -271,6 +272,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, vectorized::VecDateTimeValue ts_slot; if (!ts_slot.from_date_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; + insert_after_parse_failure = false; break; } ts_slot.cast_to_date(); @@ -283,6 +285,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, vectorized::VecDateTimeValue ts_slot; if (!ts_slot.from_date_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; + insert_after_parse_failure = false; break; } ts_slot.to_datetime(); @@ -295,6 +298,7 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, DecimalV2Value decimal_slot; if (decimal_slot.parse_from_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; + insert_after_parse_failure = false; break; } reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value( @@ -308,12 +312,14 @@ inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, } if (UNLIKELY(parse_result == StringParser::PARSE_FAILURE)) { - if (true == slot_desc->is_nullable()) { + if (slot_desc->is_nullable()) { auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(nullable_col_ptr); size_t size = nullable_column->get_null_map_data().size(); doris::vectorized::NullMap& null_map_data = nullable_column->get_null_map_data(); null_map_data[size - 1] = 1; - nullable_column->get_nested_column().insert_default(); + if (!insert_after_parse_failure) { + nullable_column->get_nested_column().insert_default(); + } } return false; } diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 3cdadd5bdc..6e74ac1e95 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -320,6 +320,18 @@ size_t Block::rows() const { return 0; } +std::string Block::each_col_size() { + std::stringstream ss; + for (const auto& elem : data) { + if (elem.column) { + ss << elem.column->size() << " | "; + } else { + ss << "-1 | "; + } + } + return ss.str(); +} + void Block::set_num_rows(size_t length) { if (rows() > length) { for (auto& elem : data) { diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 93d9cabaab..6b7cc9d5a1 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -180,6 +180,8 @@ public: /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. size_t rows() const; + std::string each_col_size(); + // Cut the rows in block, use in LIMIT operation void set_num_rows(size_t length); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org