This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4a9708d2066613870ce71e155a9c95168a0b41af Author: YueW <45946325+tany...@users.noreply.github.com> AuthorDate: Wed Oct 18 16:12:22 2023 +0800 [opt](error msg) Make data codec error clearly when load csv data can't display (#25540) Co-authored-by: Tanya-W <tanya1218w@163,com> --- be/src/vec/exec/format/csv/csv_reader.cpp | 8 ++- .../stream_load/csv_with_none_utf8_data.csv | 4 ++ .../test_csv_with_none_utf8_data.groovy | 73 ++++++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index c997afe6a31..70a691f5d66 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -703,10 +703,12 @@ Status CsvReader::_validate_line(const Slice& line, bool* success) { return Status::InternalError("Only support csv data in utf8 codec"); } else { RETURN_IF_ERROR(_state->append_error_msg_to_file( - []() -> std::string { return "Unable to display"; }, - []() -> std::string { + [&]() -> std::string { return std::string(line.data, line.size); }, + [&]() -> std::string { fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "Unable to display"); + fmt::format_to(error_msg, "{}{}", + "Unable to display, only support csv data in utf8 codec", + ", please check the data encoding"); return fmt::to_string(error_msg); }, &_line_reader_eof)); diff --git a/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv b/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv new file mode 100644 index 00000000000..86d326d0c62 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/csv_with_none_utf8_data.csv @@ -0,0 +1,4 @@ +123abc2022-12-012022-12-01:09:30:31 +233��ǰ���¹⣬���ǵ���˪2022-12-012022-12-01:09:30:31 +343efg2022-12-012022-12-01:09:30:31 +453��� ��̫��2022-12-012022-12-01:09:30:31 diff --git a/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy b/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy new file mode 100644 index 00000000000..bca699f7433 --- /dev/null +++ b/regression-test/suites/load_p0/stream_load/test_csv_with_none_utf8_data.groovy @@ -0,0 +1,73 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_csv_with_none_utf8_data", "p0") { + def tableName = "test_csv_with_none_utf8_data" + + // create table + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k1` int(20) NULL, + `k2` bigint(20) NULL, + `v1` tinyint(4) NULL, + `v2` text NULL, + `v3` date NULL, + `v4` datetime NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`, `k2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3 + PROPERTIES ("replication_allocation" = "tag.location.default: 1"); + """ + + streamLoad { + table "${tableName}" + + set 'column_separator', '\\x01' + + file 'csv_with_none_utf8_data.csv' + + // stream load action will check result, include Success status, and NumberTotalRows == NumberLoadedRows + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + def (code, out, err) = curl("GET", json.ErrorURL) + log.info("error result: " + out) + def checkError = out.contains("Unable to display, only support csv data in utf8 codec") + assertTrue(checkError) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("too many filtered rows")) + assertEquals(4, json.NumberTotalRows) + assertEquals(2, json.NumberLoadedRows) + assertEquals(2, json.NumberFilteredRows) + assertTrue(json.LoadBytes > 0) + log.info("url: " + json.ErrorURL) + } + } + + + // drop drop + sql """ DROP TABLE IF EXISTS ${tableName} """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org