This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 36a70ba1e79e58bd5a56a9d4c14feab572e224ea
Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
AuthorDate: Sat Apr 20 17:18:50 2024 +0800

    [Fix](Csv-Reader)Fix the issue of BE core dump caused by improper 
configuration of column_seperator and line_delimiter. (#33693)
---
 be/src/vec/exec/format/csv/csv_reader.cpp          |  8 ++-
 .../data/load_p0/stream_load/special_seperator.csv |  2 +
 .../stream_load/test_csv_special_seperator.out     |  5 ++
 .../stream_load/test_csv_special_seperator.groovy  | 69 ++++++++++++++++++++++
 4 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index c42a465fc63..a10ba8c3d14 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -78,9 +78,11 @@ void EncloseCsvTextFieldSplitter::do_split(const Slice& 
line, std::vector<Slice>
                            splitted_values);
         value_start_offset = idx + _value_sep_len;
     }
-    // process the last column
-    process_value_func(data, value_start_offset, line.size - 
value_start_offset, _trimming_char,
-                       splitted_values);
+    if (line.size >= value_start_offset) {
+        // process the last column
+        process_value_func(data, value_start_offset, line.size - 
value_start_offset, _trimming_char,
+                           splitted_values);
+    }
 }
 
 void PlainCsvTextFieldSplitter::_split_field_single_char(const Slice& line,
diff --git a/regression-test/data/load_p0/stream_load/special_seperator.csv 
b/regression-test/data/load_p0/stream_load/special_seperator.csv
new file mode 100644
index 00000000000..85cfeba67e9
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/special_seperator.csv
@@ -0,0 +1,2 @@
+1|@|"100115"|@|"5501391"|@|"{\"avgSendTime\":2500,\"backupRecipientPhone\":\"[\\\"11345671255_4561\\\",\\\"14536625234_5370\\\"]\",\"caution\":\"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
 14536625234_8204\\uff0c\\u624b\\u673a\\u53f7 123****1234 
\\u987e\\u5ba2\\u9700\\u8981\\u9910\\u5177\",\"cityId\":351600,\"ctime\":1142353434,\"daySeq\":\"16\",\"deliveryTime\":0,\"detail\":\"[{\\\"actual_price\\\":1.1,\\\"app_food_code\\\":\\\"\\\\u9ec4\\\\u7116\\\\u9e21+\\\\u5343\\\\u5f20+\\\\u706b\\\\
 [...]
+2|@|"100115"|@|"4442066"|@|"{\"avgSendTime\":3636,\"backupRecipientPhone\":\"[\\\"11342355223_6672\\\"]\",\"caution\":\"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
 14536625234_3939\\uff0c\\u624b\\u673a\\u53f7 135****5187 
\\u987e\\u5ba2\\u9700\\u89812\\u4efd\\u9910\\u5177\",\"cityId\":510725,\"ctime\":1124567897,\"daySeq\":\"1\",\"deliveryTime\":0,\"detail\":\"[{\\\"actual_price\\\":16,\\\"app_food_code\\\":\\\"\\\\u4e2d\\\\u9ebb\\\\u6284\\\\u624b\\\\u4e09\\\\u4e24\\\",\\\"attr_counts\\\":
 [...]
diff --git 
a/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out 
b/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out
new file mode 100644
index 00000000000..c7dc0937b28
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_csv_special_seperator.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+1      100115  5501391 
{"avgSendTime":2500,"backupRecipientPhone":"[\\"11345671255_4561\\",\\"14536625234_5370\\"]","caution":"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
 14536625234_8204\\uff0c\\u624b\\u673a\\u53f7 123****1234 
\\u987e\\u5ba2\\u9700\\u8981\\u9910\\u5177","cityId":351600,"ctime":1142353434,"daySeq":"16","deliveryTime":0,"detail":"[{\\"actual_price\\":1.1,\\"app_food_code\\":\\"\\\\u9ec4\\\\u7116\\\\u9e21+\\\\u5343\\\\u5f20+\\\\u706b\\\\u817f\\\\u80a0+\\\\u7c73\\\\u996d\\",\\"att
 [...]
+2      100115  4442066 
{"avgSendTime":3636,"backupRecipientPhone":"[\\"11342355223_6672\\"]","caution":"\\u6536\\u9910\\u4eba\\u9690\\u79c1\\u53f7
 14536625234_3939\\uff0c\\u624b\\u673a\\u53f7 135****5187 
\\u987e\\u5ba2\\u9700\\u89812\\u4efd\\u9910\\u5177","cityId":510725,"ctime":1124567897,"daySeq":"1","deliveryTime":0,"detail":"[{\\"actual_price\\":16,\\"app_food_code\\":\\"\\\\u4e2d\\\\u9ebb\\\\u6284\\\\u624b\\\\u4e09\\\\u4e24\\",\\"attr_counts\\":\\"\\",\\"attr_names\\":\\"\\",\\"box_num\\"
 [...]
+
diff --git 
a/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy 
b/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy
new file mode 100644
index 00000000000..671db175dde
--- /dev/null
+++ 
b/regression-test/suites/load_p0/stream_load/test_csv_special_seperator.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_csv_special_seperator", "p0") {
+    def tableName = "test_csv_special_seperator"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE if not exists `${tableName}` (
+            `id` bigint(20) NOT NULL,
+            `developerid` varchar(64) DEFAULT NULL COMMENT '',
+            `epoiid` varchar(64) DEFAULT NULL COMMENT '',
+            `orderjson` string COMMENT '',
+            `addtime` datetime NOT NULL,
+            `syn` tinyint(1) DEFAULT '0' COMMENT '',
+            `shopid` varchar(16) DEFAULT NULL COMMENT '',
+            `shopname` varchar(255) DEFAULT NULL COMMENT '',
+            `orderid` varchar(32) DEFAULT NULL COMMENT '',
+            `orderindex` varchar(16) DEFAULT NULL COMMENT '',
+            `ordervid` varchar(32) DEFAULT NULL COMMENT '',
+            `totalprice` varchar(8) DEFAULT NULL COMMENT '',
+            `sn` string COMMENT '打印机',
+            `printtype` int(1) DEFAULT NULL COMMENT '',
+            `is_print` int(1) DEFAULT '0' COMMENT '',
+            `is_cancel` tinyint(1) DEFAULT '0' COMMENT '',
+            `p_data` string COMMENT '',
+            `c_code` varchar(5) DEFAULT NULL COMMENT '',
+            `c_data` string COMMENT '',
+            `c_confirmtimes` int(2) DEFAULT '0' COMMENT ''
+        ) 
+        ENGINE=OLAP 
+        DUPLICATE KEY(`id`)
+        COMMENT ''
+        DISTRIBUTED BY HASH(`orderid`) BUCKETS 100 
+        PROPERTIES (
+        "replication_num" = "1",
+            "compression" = "ZSTD"
+        );
+    """
+
+    streamLoad {
+        table "${tableName}"
+        set 'column_separator', "|@|"
+        set 'line_delimiter', "|@|\\n"
+        set 'trim_double_quotes', 'true'
+        set 'enclose', "\""
+        set 'escape', '\\'
+        set 'max_filter_ratio', '0'
+
+        file "special_seperator.csv"
+    }
+
+    sql "sync"
+    order_qt_select1 """ SELECT * FROM ${tableName} ORDER BY id;"""
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to