This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 5272b06f9cc branch-3.1: [fix](csv reader) trim enclose do not require
trim_double_quotes property #54937 (#55090)
5272b06f9cc is described below
commit 5272b06f9cc04853ba2e27069465d384f73c8388
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Aug 21 16:46:09 2025 +0800
branch-3.1: [fix](csv reader) trim enclose do not require
trim_double_quotes property #54937 (#55090)
Cherry-picked from #54937
Co-authored-by: hui lai <[email protected]>
---
be/src/vec/exec/format/csv/csv_reader.cpp | 3 +--
be/src/vec/exec/format/csv/csv_reader.h | 2 --
.../external_table_p0/tvf/test_local_tvf_enclose.out | Bin 383 -> 224 bytes
.../test_csv_with_enclose_and_escapeS3_load.out | Bin 743 -> 731 bytes
.../stream_load/test_csv_with_enclose_and_escape.out | Bin 863 -> 880 bytes
.../external_table_p0/tvf/test_local_tvf_enclose.groovy | 10 ----------
.../test_csv_with_enclose_and_escapeS3_load.groovy | 10 +++++-----
.../stream_load/test_csv_with_enclose_and_escape.groovy | 5 -----
8 files changed, 6 insertions(+), 24 deletions(-)
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 8eef40f7875..7c60eac0b3b 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -516,7 +516,6 @@ Status CsvReader::_init_options() {
_trim_double_quotes = _params.file_attributes.trim_double_quotes;
}
_options.converted_from_string = _trim_double_quotes;
- _not_trim_enclose = (!_trim_double_quotes && _enclose == '\"');
if (_state != nullptr) {
_keep_cr = _state->query_options().keep_carriage_return;
@@ -573,7 +572,7 @@ Status CsvReader::_create_line_reader() {
col_sep_num, _enclose, _escape, _keep_cr);
_fields_splitter = std::make_unique<EncloseCsvTextFieldSplitter>(
- _trim_tailing_spaces, !_not_trim_enclose,
+ _trim_tailing_spaces, true,
std::static_pointer_cast<EncloseCsvLineReaderCtx>(text_line_reader_ctx),
_value_separator_length, _enclose);
}
diff --git a/be/src/vec/exec/format/csv/csv_reader.h
b/be/src/vec/exec/format/csv/csv_reader.h
index 858e051b690..d5af381f9cb 100644
--- a/be/src/vec/exec/format/csv/csv_reader.h
+++ b/be/src/vec/exec/format/csv/csv_reader.h
@@ -275,8 +275,6 @@ private:
char _enclose = 0;
bool _trim_double_quotes = false;
bool _trim_tailing_spaces = false;
- // `should_not_trim` is to manage the case that: user do not expect to
trim double quotes but enclose is double quotes
- bool _not_trim_enclose = true;
bool _keep_cr = false;
io::IOContext* _io_ctx = nullptr;
diff --git
a/regression-test/data/external_table_p0/tvf/test_local_tvf_enclose.out
b/regression-test/data/external_table_p0/tvf/test_local_tvf_enclose.out
index 6e5d10e4858..ab5a4003369 100644
Binary files
a/regression-test/data/external_table_p0/tvf/test_local_tvf_enclose.out and
b/regression-test/data/external_table_p0/tvf/test_local_tvf_enclose.out differ
diff --git
a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
index 8d4444ac418..31a3dfb21f1 100644
Binary files
a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
and
b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
differ
diff --git
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
index d4509165cf4..e00ca42ea84 100644
Binary files
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
and
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
differ
diff --git
a/regression-test/suites/external_table_p0/tvf/test_local_tvf_enclose.groovy
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_enclose.groovy
index 0dfe231a1d4..ea982c6b375 100644
--- a/regression-test/suites/external_table_p0/tvf/test_local_tvf_enclose.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_local_tvf_enclose.groovy
@@ -46,16 +46,6 @@ suite("test_local_tvf_enclose",
"p0,tvf,external,external_docker") {
"enclose" = "\\\"") order by id;
"""
- qt_enclose_2 """
- select * from local(
- "file_path" = "${filename}",
- "backend_id" = "${be_id}",
- "format" = "csv_with_names",
- "column_separator" = ", ",
- "enclose" = "\\\"",
- "trim_double_quotes" = "true") order by id;
- """
-
// test error case
test {
sql """
diff --git
a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
index c844a2417c6..1a710008011 100644
---
a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
+++
b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
@@ -51,24 +51,24 @@ suite("test_csv_with_enclose_and_escapeS3_load", "load_p0")
{
for (i in 0..<normalCases.size()) {
attributesList.add(new
LoadAttributes("s3://${s3BucketName}/regression/load/data/${normalCases[i]}.csv",
"${tableName}", "LINES TERMINATED BY \"\n\"", "COLUMNS
TERMINATED BY \",\"", "FORMAT AS \"CSV\"", "(k1,k2,v1,v2,v3,v4)",
- "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\",
\"trim_double_quotes\" = \"true\")"))
+ "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\")"))
}
attributesList.add(new
LoadAttributes("s3://${s3BucketName}/regression/load/data/enclose_incomplete.csv",
"${tableName}", "LINES TERMINATED BY \"\n\"", "COLUMNS TERMINATED BY
\",\"", "FORMAT AS \"CSV\"", "(k1,k2,v1,v2,v3,v4)",
- "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\",
\"trim_double_quotes\" = \"true\")").addProperties("max_filter_ratio", "0.5"))
+ "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" =
\"\\\\\")").addProperties("max_filter_ratio", "0.5"))
attributesList.add(new
LoadAttributes("s3://${s3BucketName}/regression/load/data/enclose_without_escape.csv",
"${tableName}", "LINES TERMINATED BY \"\n\"", "COLUMNS TERMINATED BY
\",\"", "FORMAT AS \"CSV\"", "(k1,k2,v1,v2,v3,v4)",
- "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\",
\"trim_double_quotes\" = \"true\")"))
+ "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\")"))
attributesList.add(new
LoadAttributes("s3://${s3BucketName}/regression/load/data/enclose_multi_char_delimiter.csv",
"${tableName}", "LINES TERMINATED BY \"\$\$\$\"", "COLUMNS TERMINATED
BY \"@@\"", "FORMAT AS \"CSV\"", "(k1,k2,v1,v2,v3,v4)",
- "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\",
\"trim_double_quotes\" = \"true\")"))
+ "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\")"))
attributesList.add(new
LoadAttributes("s3://${s3BucketName}/regression/load/data/enclose_not_trim_quotes.csv",
"${tableName}", "", "COLUMNS TERMINATED BY \",\"", "FORMAT AS
\"CSV\"", "(k1,k2,v1,v2,v3,v4)",
- "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" =
\"\\\\\")").addProperties("trim_double_quotes", "false"))
+ "PROPERTIES (\"enclose\" = \"\\\"\", \"escape\" = \"\\\\\")"))
def ak = getS3AK()
def sk = getS3SK()
diff --git
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
index 5625a7e6de6..d15c702ff4d 100644
---
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
+++
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
@@ -47,7 +47,6 @@ suite("test_csv_with_enclose_and_escape", "p0") {
streamLoad {
table "${tableName}"
set 'column_separator', ','
- set 'trim_double_quotes', 'true'
set 'enclose', "\""
set 'escape', '\\'
@@ -58,7 +57,6 @@ suite("test_csv_with_enclose_and_escape", "p0") {
streamLoad {
table "${tableName}"
set 'column_separator', ','
- set 'trim_double_quotes', 'true'
set 'enclose', "\""
set 'escape', '\\'
set 'max_filter_ratio', '0.5'
@@ -76,7 +74,6 @@ suite("test_csv_with_enclose_and_escape", "p0") {
streamLoad {
table "${tableName}"
set 'column_separator', ','
- set 'trim_double_quotes', 'true'
set 'enclose', "\""
set 'escape', '\\'
@@ -94,7 +91,6 @@ suite("test_csv_with_enclose_and_escape", "p0") {
table "${tableName}"
set 'column_separator', '@@'
set 'line_delimiter', '$$$'
- set 'trim_double_quotes', 'true'
set 'enclose', "\""
set 'escape', '\\'
@@ -104,7 +100,6 @@ suite("test_csv_with_enclose_and_escape", "p0") {
streamLoad {
table "${tableName}"
set 'column_separator', ','
- set 'trim_double_quotes', 'false'
set 'enclose', "\""
set 'escape', '\\'
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]