This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.1 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 131a0ac56e5cb4c06122b99be63021199f9a72e4 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri May 20 09:55:13 2022 +0800 [fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190) Hive and trino/presto would automatically trim the trailing spaces but Doris doesn't. This would cause different query result with hive. Add a new session variable "trim_tailing_spaces_for_external_table_query". If set to true, when reading csv from broker scan node, it will trim the tailing space of the column --- be/src/exec/broker_scanner.cpp | 29 ++++++++++++++++------ be/src/runtime/runtime_state.h | 4 +++ .../java/org/apache/doris/qe/SessionVariable.java | 14 +++++++++++ gensrc/thrift/PaloInternalService.thrift | 3 +++ 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index 83b0794aa3..aad3f5deef 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -337,19 +337,20 @@ void BrokerScanner::split_line(const Slice& line) { delete[] ptr; } else { const char* value = line.data; - size_t start = 0; // point to the start pos of next col value. - size_t curpos = 0; // point to the start pos of separator matching sequence. - size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t start = 0; // point to the start pos of next col value. + size_t curpos = 0; // point to the start pos of separator matching sequence. + size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t non_space = 0; // point to the last pos of non_space charactor. // Separator: AAAA // - // curpos + // p1 // ▼ // AAAA // 1000AAAA2000AAAA // ▲ ▲ // Start │ - // p1 + // curpos while (curpos < line.size) { if (*(value + curpos + p1) != _value_separator[p1]) { @@ -360,16 +361,30 @@ void BrokerScanner::split_line(const Slice& line) { p1++; if (p1 == _value_separator_length) { // Match a separator - _split_values.emplace_back(value + start, curpos - start); + non_space = curpos; + // Trim tailing spaces. Be consistent with hive and trino's behavior. + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); start = curpos + _value_separator_length; curpos = start; p1 = 0; + non_space = 0; } } } CHECK(curpos == line.size) << curpos << " vs " << line.size; - _split_values.emplace_back(value + start, curpos - start); + non_space = curpos; + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); } } diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 449b4c2a17..32f4bb97d4 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -349,6 +349,10 @@ public: bool enable_vectorized_exec() const { return _query_options.enable_vectorized_engine; } + bool trim_tailing_spaces_for_external_table_query() const { + return _query_options.trim_tailing_spaces_for_external_table_query; + } + bool return_object_data_as_binary() const { return _query_options.return_object_data_as_binary; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index e2aa801283..a3ee7f5baf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -180,6 +180,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PROJECTION = "enable_projection"; + public static final String TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY = "trim_tailing_spaces_for_external_table_query"; + // session origin value public Map<Field, String> sessionOriginValue = new HashMap<Field, String>(); // check stmt is or not [select /*+ SET_VAR(...)*/ ...] @@ -439,6 +441,9 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_PROJECTION) private boolean enableProjection = false; + @VariableMgr.VarAttr(name = TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY, needForward = true) + public boolean trimTailingSpacesForExternalTableQuery = false; + public String getBlockEncryptionMode() { return blockEncryptionMode; } @@ -895,6 +900,14 @@ public class SessionVariable implements Serializable, Writable { return enableProjection; } + public boolean isTrimTailingSpacesForExternalTableQuery() { + return trimTailingSpacesForExternalTableQuery; + } + + public void setTrimTailingSpacesForExternalTableQuery(boolean trimTailingSpacesForExternalTableQuery) { + this.trimTailingSpacesForExternalTableQuery = trimTailingSpacesForExternalTableQuery; + } + // Serialize to thrift object // used for rest api public TQueryOptions toThrift() { @@ -912,6 +925,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setCodegenLevel(codegenLevel); tResult.setEnableVectorizedEngine(enableVectorizedEngine); tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary); + tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery); tResult.setBatchSize(batchSize); tResult.setDisableStreamPreaggregations(disableStreamPreaggregations); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 713487f53d..19ae35a64e 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -160,6 +160,9 @@ struct TQueryOptions { // show bitmap data in result, if use this in mysql cli may make the terminal // output corrupted character 43: optional bool return_object_data_as_binary = false + + // trim tailing spaces while querying external table and stream load + 44: optional bool trim_tailing_spaces_for_external_table_query = false } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org