This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b515f86669b [fix](fold) fixed an issue with be computing constants (#43410) b515f86669b is described below commit b515f86669bc118cc03cf9f8332b6397194dc337 Author: lw112 <131352377+felixw...@users.noreply.github.com> AuthorDate: Fri Dec 20 10:38:03 2024 +0800 [fix](fold) fixed an issue with be computing constants (#43410) ### What problem does this PR solve? issue close: #43061 1、Problem When enable_fold_constant_by_be=true is set,the results of between below queries are inconsistent select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) +----------------------------------------------+ | hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) | +----------------------------------------------+ | C2BD89103557CCBF7ED97B51860225A0 | +----------------------------------------------+ select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t +--------------------------------------------------+ | hex(s) | +--------------------------------------------------+ | C2BDEFBFBD103557CCBF7EEFBFBD7B51EFBFBD0225EFBFBD | +--------------------------------------------------+ 2、mysql results select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t; +----------------------------------+ | hex(s) | +----------------------------------+ | C2BD89103557CCBF7ED97B51860225A0 | +----------------------------------+ 3、cause When processing binary data such as FromBase64, BE will return the original binary data through the bytesValue field, and the previous code only uses the stringValue field, resulting in the binary data being corrupted during the string encoding conversion process --- .../nereids/rules/expression/rules/FoldConstantRuleOnBE.java | 12 ++++++++++-- .../apache/doris/nereids/trees/expressions/LiteralTest.java | 8 ++++++-- .../expression/fold_constant/fold_constant_by_be.out | 6 ++++++ .../expression/fold_constant/fold_constant_by_be.groovy | 11 +++++++---- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java index 70e63b050a8..dd79de70e26 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java @@ -487,8 +487,16 @@ public class FoldConstantRuleOnBE implements ExpressionPatternRuleFactory { } else if (type.isStringLikeType()) { int num = resultContent.getStringValueCount(); for (int i = 0; i < num; ++i) { - Literal literal = new StringLiteral(resultContent.getStringValue(i)); - res.add(literal); + // get the raw byte data to avoid character encoding conversion problems + ByteString bytesValues = resultContent.getBytesValue(i); + // use UTF-8 encoding to ensure proper handling of binary data + String stringValue = bytesValues.toStringUtf8(); + // handle special NULL value cases + if ("\\N".equalsIgnoreCase(stringValue) && resultContent.hasHasNull()) { + res.add(new NullLiteral(type)); + } else { + res.add(new StringLiteral(stringValue)); + } } } else if (type.isArrayType()) { ArrayType arrayType = (ArrayType) type; diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java index fcb64ff0bfa..9c7e2e5b151 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java @@ -233,7 +233,9 @@ class LiteralTest { PValues.Builder resultContentBuilder = PValues.newBuilder(); for (int i = 0; i < elementsArray.length; i = i + 2) { childBuilder1.addInt32Value(elementsArray[i]); - childBuilder2.addStringValue("str" + (i + 1)); + String strValue = "str" + (i + 1); + childBuilder2.addStringValue(strValue); + childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue)); } childBuilder1.setType(childTypeBuilder1.build()); childBuilder2.setType(childTypeBuilder2.build()); @@ -280,7 +282,9 @@ class LiteralTest { PValues.Builder resultContentBuilder = PValues.newBuilder(); for (int i = 0; i < elementsArray.length; i = i + 2) { childBuilder1.addInt32Value(elementsArray[i]); - childBuilder2.addStringValue("str" + (i + 1)); + String strValue = "str" + (i + 1); + childBuilder2.addStringValue(strValue); + childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue)); } childBuilder1.setType(childTypeBuilder1.build()); childBuilder2.setType(childTypeBuilder2.build()); diff --git a/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out b/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out index c7c506292a5..8d9d704684e 100644 --- a/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out +++ b/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out @@ -1,4 +1,10 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +C2BD89103557CCBF7ED97B51860225A0 + +-- !sql -- +C2BD89103557CCBF7ED97B51860225A0 + -- !sql_1 -- 80000 diff --git a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy index 09a80209c04..f3b1b0cdcd5 100644 --- a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy +++ b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy @@ -22,6 +22,9 @@ suite("fold_constant_by_be") { sql 'set enable_fallback_to_original_planner=false' sql 'set enable_fold_constant_by_be=true' + qt_sql """ select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')); """ + qt_sql """ select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t; """ + test { sql ''' select if( @@ -32,8 +35,8 @@ suite("fold_constant_by_be") { result([['9999-07-31']]) } - sql """ - CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL) + sql """ + CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL) UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1"); """ @@ -53,7 +56,7 @@ suite("fold_constant_by_be") { sql 'set query_timeout=12;' qt_sql "select sleep(sign(1)*5);" - + explain { sql("verbose select substring('123456', 1, 3)") contains "varchar(3)" @@ -71,7 +74,7 @@ suite("fold_constant_by_be") { col_varchar_1000__undef_signed varchar(1000) null , col_varchar_1000__undef_signed_not_null varchar(1000) not null , col_varchar_1001__undef_signed varchar(1001) null , - col_varchar_1001__undef_signed_not_null varchar(1001) not null + col_varchar_1001__undef_signed_not_null varchar(1001) not null ) engine=olap DUPLICATE KEY(pk, col_char_255__undef_signed, col_char_100__undef_signed) distributed by hash(pk) buckets 10 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org