This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e30c3f3a65b [fix](csv_reader)fix bug that Read garbled files caused be crash. (#24164) e30c3f3a65b is described below commit e30c3f3a65bf6eb7887b3128d233376648de3acd Author: daidai <2017501...@qq.com> AuthorDate: Wed Sep 13 14:12:55 2023 +0800 [fix](csv_reader)fix bug that Read garbled files caused be crash. (#24164) fix bug that read garbled files caused be crash. --- be/src/exec/text_converter.cpp | 3 +- .../vec/data_types/serde/data_type_array_serde.cpp | 9 +++- .../vec/data_types/serde/data_type_map_serde.cpp | 11 +++-- .../data_types/serde/data_type_struct_serde.cpp | 5 ++- .../hive/test_text_garbled_file.out | Bin 0 -> 296830 bytes .../hive/test_text_garbled_file.groovy | 46 +++++++++++++++++++++ 6 files changed, 67 insertions(+), 7 deletions(-) diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp index 95232678ea0..59417bc9265 100644 --- a/be/src/exec/text_converter.cpp +++ b/be/src/exec/text_converter.cpp @@ -330,9 +330,10 @@ bool TextConverter::_write_data(const TypeDescriptor& type_desc, kv = i; continue; } - if (i == len || data[i] == _collection_delimiter) { + if ((i == len || data[i] == _collection_delimiter) && i >= kv + 1) { ranges.push_back({from, kv, i - 1}); from = i + 1; + kv = from; } } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 1dca5299894..5aa78a1f886 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -71,7 +71,9 @@ Status DataTypeArraySerDe::deserialize_column_from_json_vector(IColumn& column, Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { - DCHECK(!slice.empty()); + if (slice.empty()) { + return Status::InvalidArgument("slice is empty!"); + } auto& array_column = assert_cast<ColumnArray&>(column); auto& offsets = array_column.get_offsets(); IColumn& nested_column = array_column.get_data(); @@ -132,6 +134,9 @@ Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level) const { + if (slice.empty()) { + return Status::InvalidArgument("slice is empty!"); + } auto& array_column = assert_cast<ColumnArray&>(column); auto& offsets = array_column.get_offsets(); IColumn& nested_column = array_column.get_data(); @@ -303,4 +308,4 @@ Status DataTypeArraySerDe::write_column_to_mysql(const IColumn& column, } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index a0e6636c507..cce5986b195 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -65,7 +65,9 @@ void DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level) const { - DCHECK(!slice.empty()); + if (slice.empty()) { + return Status::InvalidArgument("slice is empty!"); + } auto& array_column = assert_cast<ColumnMap&>(column); auto& offsets = array_column.get_offsets(); IColumn& nested_key_column = array_column.get_keys(); @@ -92,10 +94,11 @@ Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Sl kv = i; continue; } - if (i == slice.size || slice[i] == collection_delimiter) { + if ((i == slice.size || slice[i] == collection_delimiter) && i >= kv + 1) { key_slices.push_back({slice.data + from, kv - from}); value_slices.push_back({slice.data + kv + 1, i - 1 - kv}); from = i + 1; + kv = from; } } @@ -169,7 +172,9 @@ Status DataTypeMapSerDe::deserialize_column_from_json_vector(IColumn& column, Status DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { - DCHECK(!slice.empty()); + if (slice.empty()) { + return Status::InvalidArgument("slice is empty!"); + } auto& array_column = assert_cast<ColumnMap&>(column); auto& offsets = array_column.get_offsets(); IColumn& nested_key_column = array_column.get_keys(); diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index b202d0fb237..06ec4d709bb 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -44,6 +44,9 @@ void DataTypeStructSerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWr Status DataTypeStructSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level) const { + if (slice.empty()) { + return Status::InvalidArgument("slice is empty!"); + } char struct_delimiter = options.get_collection_delimiter(nesting_level); std::vector<Slice> slices; @@ -190,4 +193,4 @@ Status DataTypeStructSerDe::write_column_to_mysql(const IColumn& column, } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/regression-test/data/external_table_p2/hive/test_text_garbled_file.out b/regression-test/data/external_table_p2/hive/test_text_garbled_file.out new file mode 100644 index 00000000000..b003cd49e97 Binary files /dev/null and b/regression-test/data/external_table_p2/hive/test_text_garbled_file.out differ diff --git a/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy b/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy new file mode 100644 index 00000000000..a3ea6a3bcc2 --- /dev/null +++ b/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_text_garbled_file", "p2,external,hive,external_remote,external_remote_hive") { + //test hive garbled files , prevent be hanged + + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_text_garbled_file" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + + + order_qt_garbled_file """ + select * from ${catalog_name}.multi_catalog.test_csv_format_error; + """ + + + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org