This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 36a70ba1e79e58bd5a56a9d4c14feab572e224ea Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com> AuthorDate: Sat Apr 20 17:18:50 2024 +0800 [Fix](Csv-Reader)Fix the issue of BE core dump caused by improper configuration of column_seperator and line_delimiter. (#33693) --- be/src/vec/exec/format/csv/csv_reader.cpp | 8 ++- .../data/load_p0/stream_load/special_seperator.csv | 2 + .../stream_load/test_csv_special_seperator.out | 5 ++ .../stream_load/test_csv_special_seperator.groovy | 69 ++++++++++++++++++++++ 4 files changed, 81 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index c42a465fc63..a10ba8c3d14 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -78,9 +78,11 @@ void EncloseCsvTextFieldSplitter::do_split(const Slice& line, std::vector<Slice> splitted_values); value_start_offset = idx + _value_sep_len; } - // process the last column - process_value_func(data, value_start_offset, line.size - value_start_offset, _trimming_char, - splitted_values); + if (line.size >= value_start_offset) { + // process the last column + process_value_func(data, value_start_offset, line.size - value_start_offset, _trimming_char, + splitted_values); + } } void PlainCsvTextFieldSplitter::_split_field_single_char(const Slice& line, diff --git a/regression-test/data/load_p0/stream_load/special_seperator.csv b/regression-test/data/load_p0/stream_load/special_seperator.csv new file mode 100644 index 00000000000..85cfeba67e9 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/special_seperator.csv @@ -0,0 +1,2 @@ +1|@|"100115"|@|"5501391"|@|"{\"avgSendTime\":2500,\"backupRecipientPhone\":\"[\\\"11345671255_4561\\\",\\\"14536625234_5370\\\"]\",\"caution\":\"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7 14536625234_8204\\uff0c\\u624b\\u673a\\u53f7 123****1234 \\u987e\\u5ba2\\u9700\\u8981\\u9910\\u5177\",\"cityId\":351600,\"ctime\":1142353434,\"daySeq\":\"16\",\"deliveryTime\":0,\"detail\":\"[{\\\"actual_price\\\":1.1,\\\"app_food_code\\\":\\\"\\\\u9ec4\\\\u7116\\\\u9e21+\\\\u5343\\\\u5f20+\\\\u706b\\\\ [...] +2|@|"100115"|@|"4442066"|@|"{\"avgSendTime\":3636,\"backupRecipientPhone\":\"[\\\"11342355223_6672\\\"]\",\"caution\":\"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7 14536625234_3939\\uff0c\\u624b\\u673a\\u53f7 135****5187 \\u987e\\u5ba2\\u9700\\u89812\\u4efd\\u9910\\u5177\",\"cityId\":510725,\"ctime\":1124567897,\"daySeq\":\"1\",\"deliveryTime\":0,\"detail\":\"[{\\\"actual_price\\\":16,\\\"app_food_code\\\":\\\"\\\\u4e2d\\\\u9ebb\\\\u6284\\\\u624b\\\\u4e09\\\\u4e24\\\",\\\"attr_counts\\\": [...] diff --git a/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out b/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out new file mode 100644 index 00000000000..c7dc0937b28 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +1 100115 5501391 {"avgSendTime":2500,"backupRecipientPhone":"[\\"11345671255_4561\\",\\"14536625234_5370\\"]","caution":"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7 14536625234_8204\\uff0c\\u624b\\u673a\\u53f7 123****1234 \\u987e\\u5ba2\\u9700\\u8981\\u9910\\u5177","cityId":351600,"ctime":1142353434,"daySeq":"16","deliveryTime":0,"detail":"[{\\"actual_price\\":1.1,\\"app_food_code\\":\\"\\\\u9ec4\\\\u7116\\\\u9e21+\\\\u5343\\\\u5f20+\\\\u706b\\\\u817f\\\\u80a0+\\\\u7c73\\\\u996d\\",\\"att [...] +2 100115 4442066 {"avgSendTime":3636,"backupRecipientPhone":"[\\"11342355223_6672\\"]","caution":"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7 14536625234_3939\\uff0c\\u624b\\u673a\\u53f7 135****5187 \\u987e\\u5ba2\\u9700\\u89812\\u4efd\\u9910\\u5177","cityId":510725,"ctime":1124567897,"daySeq":"1","deliveryTime":0,"detail":"[{\\"actual_price\\":16,\\"app_food_code\\":\\"\\\\u4e2d\\\\u9ebb\\\\u6284\\\\u624b\\\\u4e09\\\\u4e24\\",\\"attr_counts\\":\\"\\",\\"attr_names\\":\\"\\",\\"box_num\\" [...] + diff --git a/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy b/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy new file mode 100644 index 00000000000..671db175dde --- /dev/null +++ b/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_csv_special_seperator", "p0") { + def tableName = "test_csv_special_seperator" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE if not exists `${tableName}` ( + `id` bigint(20) NOT NULL, + `developerid` varchar(64) DEFAULT NULL COMMENT '', + `epoiid` varchar(64) DEFAULT NULL COMMENT '', + `orderjson` string COMMENT '', + `addtime` datetime NOT NULL, + `syn` tinyint(1) DEFAULT '0' COMMENT '', + `shopid` varchar(16) DEFAULT NULL COMMENT '', + `shopname` varchar(255) DEFAULT NULL COMMENT '', + `orderid` varchar(32) DEFAULT NULL COMMENT '', + `orderindex` varchar(16) DEFAULT NULL COMMENT '', + `ordervid` varchar(32) DEFAULT NULL COMMENT '', + `totalprice` varchar(8) DEFAULT NULL COMMENT '', + `sn` string COMMENT '打印机', + `printtype` int(1) DEFAULT NULL COMMENT '', + `is_print` int(1) DEFAULT '0' COMMENT '', + `is_cancel` tinyint(1) DEFAULT '0' COMMENT '', + `p_data` string COMMENT '', + `c_code` varchar(5) DEFAULT NULL COMMENT '', + `c_data` string COMMENT '', + `c_confirmtimes` int(2) DEFAULT '0' COMMENT '' + ) + ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT '' + DISTRIBUTED BY HASH(`orderid`) BUCKETS 100 + PROPERTIES ( + "replication_num" = "1", + "compression" = "ZSTD" + ); + """ + + streamLoad { + table "${tableName}" + set 'column_separator', "|@|" + set 'line_delimiter', "|@|\\n" + set 'trim_double_quotes', 'true' + set 'enclose', "\"" + set 'escape', '\\' + set 'max_filter_ratio', '0' + + file "special_seperator.csv" + } + + sql "sync" + order_qt_select1 """ SELECT * FROM ${tableName} ORDER BY id;""" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org