This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 5fa6e892be [fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190) 5fa6e892be is described below commit 5fa6e892beb8b51ff18b8a183fd5c92b568f5eae Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri May 20 09:55:13 2022 +0800 [fix](broker-scan-node) Remove trailing spaces in broker_scanner. Make it consistent with hive and trino behavior. (#9190) Hive and trino/presto would automatically trim the trailing spaces but Doris doesn't. This would cause different query result with hive. Add a new session variable "trim_tailing_spaces_for_external_table_query". If set to true, when reading csv from broker scan node, it will trim the tailing space of the column --- be/src/exec/broker_scanner.cpp | 29 ++++++++++++++++------ be/src/runtime/runtime_state.h | 4 +++ .../java/org/apache/doris/qe/SessionVariable.java | 14 +++++++++++ gensrc/thrift/PaloInternalService.thrift | 3 +++ 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index c394424092..d9453fecf0 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -339,19 +339,20 @@ void BrokerScanner::split_line(const Slice& line) { delete[] ptr; } else { const char* value = line.data; - size_t start = 0; // point to the start pos of next col value. - size_t curpos = 0; // point to the start pos of separator matching sequence. - size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t start = 0; // point to the start pos of next col value. + size_t curpos = 0; // point to the start pos of separator matching sequence. + size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t non_space = 0; // point to the last pos of non_space charactor. // Separator: AAAA // - // curpos + // p1 // ▼ // AAAA // 1000AAAA2000AAAA // ▲ ▲ // Start │ - // p1 + // curpos while (curpos < line.size) { if (*(value + curpos + p1) != _value_separator[p1]) { @@ -362,16 +363,30 @@ void BrokerScanner::split_line(const Slice& line) { p1++; if (p1 == _value_separator_length) { // Match a separator - _split_values.emplace_back(value + start, curpos - start); + non_space = curpos; + // Trim tailing spaces. Be consistent with hive and trino's behavior. + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); start = curpos + _value_separator_length; curpos = start; p1 = 0; + non_space = 0; } } } CHECK(curpos == line.size) << curpos << " vs " << line.size; - _split_values.emplace_back(value + start, curpos - start); + non_space = curpos; + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); } } diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 8f5c0b86a1..eed32d8b82 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -326,6 +326,10 @@ public: bool enable_vectorized_exec() const { return _query_options.enable_vectorized_engine; } + bool trim_tailing_spaces_for_external_table_query() const { + return _query_options.trim_tailing_spaces_for_external_table_query; + } + bool return_object_data_as_binary() const { return _query_options.return_object_data_as_binary; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index aad54ecf75..ce9e9a40d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -180,6 +180,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PROJECTION = "enable_projection"; + public static final String TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY = "trim_tailing_spaces_for_external_table_query"; + // session origin value public Map<Field, String> sessionOriginValue = new HashMap<Field, String>(); // check stmt is or not [select /*+ SET_VAR(...)*/ ...] @@ -439,6 +441,9 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_PROJECTION) private boolean enableProjection = true; + @VariableMgr.VarAttr(name = TRIM_TAILING_SPACES_FOR_EXTERNAL_TABLE_QUERY, needForward = true) + public boolean trimTailingSpacesForExternalTableQuery = false; + public String getBlockEncryptionMode() { return blockEncryptionMode; } @@ -895,6 +900,14 @@ public class SessionVariable implements Serializable, Writable { return enableProjection; } + public boolean isTrimTailingSpacesForExternalTableQuery() { + return trimTailingSpacesForExternalTableQuery; + } + + public void setTrimTailingSpacesForExternalTableQuery(boolean trimTailingSpacesForExternalTableQuery) { + this.trimTailingSpacesForExternalTableQuery = trimTailingSpacesForExternalTableQuery; + } + // Serialize to thrift object // used for rest api public TQueryOptions toThrift() { @@ -912,6 +925,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setCodegenLevel(codegenLevel); tResult.setEnableVectorizedEngine(enableVectorizedEngine); tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary); + tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery); tResult.setBatchSize(batchSize); tResult.setDisableStreamPreaggregations(disableStreamPreaggregations); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index a67a43d2d4..4787513baf 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -160,6 +160,9 @@ struct TQueryOptions { // show bitmap data in result, if use this in mysql cli may make the terminal // output corrupted character 43: optional bool return_object_data_as_binary = false + + // trim tailing spaces while querying external table and stream load + 44: optional bool trim_tailing_spaces_for_external_table_query = false } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org