This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4a9708d2066613870ce71e155a9c95168a0b41af
Author: YueW <45946325+tany...@users.noreply.github.com>
AuthorDate: Wed Oct 18 16:12:22 2023 +0800

    [opt](error msg) Make data codec error clearly when load csv data can't 
display (#25540)
    
    
    Co-authored-by: Tanya-W <tanya1218w@163,com>
---
 be/src/vec/exec/format/csv/csv_reader.cpp          |  8 ++-
 .../stream_load/csv_with_none_utf8_data.csv        |  4 ++
 .../test_csv_with_none_utf8_data.groovy            | 73 ++++++++++++++++++++++
 3 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index c997afe6a31..70a691f5d66 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -703,10 +703,12 @@ Status CsvReader::_validate_line(const Slice& line, bool* 
success) {
             return Status::InternalError("Only support csv data in utf8 
codec");
         } else {
             RETURN_IF_ERROR(_state->append_error_msg_to_file(
-                    []() -> std::string { return "Unable to display"; },
-                    []() -> std::string {
+                    [&]() -> std::string { return std::string(line.data, 
line.size); },
+                    [&]() -> std::string {
                         fmt::memory_buffer error_msg;
-                        fmt::format_to(error_msg, "{}", "Unable to display");
+                        fmt::format_to(error_msg, "{}{}",
+                                       "Unable to display, only support csv 
data in utf8 codec",
+                                       ", please check the data encoding");
                         return fmt::to_string(error_msg);
                     },
                     &_line_reader_eof));
diff --git 
a/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv 
b/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv
new file mode 100644
index 00000000000..86d326d0c62
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv
@@ -0,0 +1,4 @@
+123abc2022-12-012022-12-01:09:30:31
+233��ǰ���¹⣬���ǵ���˪2022-12-012022-12-01:09:30:31
+343efg2022-12-012022-12-01:09:30:31
+453��� ��̫��2022-12-012022-12-01:09:30:31
diff --git 
a/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy
 
b/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy
new file mode 100644
index 00000000000..bca699f7433
--- /dev/null
+++ 
b/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy
@@ -0,0 +1,73 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_csv_with_none_utf8_data", "p0") {
+    def tableName = "test_csv_with_none_utf8_data"
+
+    // create table
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName} (
+            `k1` int(20) NULL,
+            `k2` bigint(20) NULL,
+            `v1` tinyint(4)  NULL,
+            `v2` text  NULL,
+            `v3` date  NULL,
+            `v4` datetime  NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k1`, `k2`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
+        PROPERTIES ("replication_allocation" = "tag.location.default: 1");
+    """
+
+    streamLoad {
+        table "${tableName}"
+
+        set 'column_separator', '\\x01'
+
+        file 'csv_with_none_utf8_data.csv'
+
+        // stream load action will check result, include Success status, and 
NumberTotalRows == NumberLoadedRows
+        
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            def (code, out, err) = curl("GET", json.ErrorURL)
+            log.info("error result: " + out)
+            def checkError = out.contains("Unable to display, only support csv 
data in utf8 codec")
+            assertTrue(checkError)
+            assertEquals("fail", json.Status.toLowerCase())
+            assertTrue(json.Message.contains("too many filtered rows"))
+            assertEquals(4, json.NumberTotalRows)
+            assertEquals(2, json.NumberLoadedRows)
+            assertEquals(2, json.NumberFilteredRows)
+            assertTrue(json.LoadBytes > 0)
+            log.info("url: " + json.ErrorURL)
+        }
+    }
+
+
+    // drop drop
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to