This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b515f86669b [fix](fold) fixed an issue with be computing constants 
(#43410)
b515f86669b is described below

commit b515f86669bc118cc03cf9f8332b6397194dc337
Author: lw112 <131352377+felixw...@users.noreply.github.com>
AuthorDate: Fri Dec 20 10:38:03 2024 +0800

    [fix](fold) fixed an issue with be computing constants (#43410)
    
    ### What problem does this PR solve?
    
    issue close: #43061
    
    1、Problem
    When enable_fold_constant_by_be=true is set,the results of
    between below queries are inconsistent
    
    select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA=='))
    +----------------------------------------------+
    | hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) |
    +----------------------------------------------+
    | C2BD89103557CCBF7ED97B51860225A0             |
    +----------------------------------------------+
    
    select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t
    +--------------------------------------------------+
    | hex(s)                                           |
    +--------------------------------------------------+
    | C2BDEFBFBD103557CCBF7EEFBFBD7B51EFBFBD0225EFBFBD |
    +--------------------------------------------------+
    
    2、mysql results
    
    select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t;
    +----------------------------------+
    | hex(s)                           |
    +----------------------------------+
    | C2BD89103557CCBF7ED97B51860225A0 |
    +----------------------------------+
    
    3、cause
    When processing binary data such as FromBase64, BE will return the
    original binary data through the bytesValue field, and the previous code
    only uses the stringValue field, resulting in the binary data being
    corrupted during the string encoding conversion process
---
 .../nereids/rules/expression/rules/FoldConstantRuleOnBE.java | 12 ++++++++++--
 .../apache/doris/nereids/trees/expressions/LiteralTest.java  |  8 ++++++--
 .../expression/fold_constant/fold_constant_by_be.out         |  6 ++++++
 .../expression/fold_constant/fold_constant_by_be.groovy      | 11 +++++++----
 4 files changed, 29 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index 70e63b050a8..dd79de70e26 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -487,8 +487,16 @@ public class FoldConstantRuleOnBE implements 
ExpressionPatternRuleFactory {
         } else if (type.isStringLikeType()) {
             int num = resultContent.getStringValueCount();
             for (int i = 0; i < num; ++i) {
-                Literal literal = new 
StringLiteral(resultContent.getStringValue(i));
-                res.add(literal);
+                // get the raw byte data to avoid character encoding 
conversion problems
+                ByteString bytesValues = resultContent.getBytesValue(i);
+                // use UTF-8 encoding to ensure proper handling of binary data
+                String stringValue = bytesValues.toStringUtf8();
+                // handle special NULL value cases
+                if ("\\N".equalsIgnoreCase(stringValue) && 
resultContent.hasHasNull()) {
+                    res.add(new NullLiteral(type));
+                } else {
+                    res.add(new StringLiteral(stringValue));
+                }
             }
         } else if (type.isArrayType()) {
             ArrayType arrayType = (ArrayType) type;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java
index fcb64ff0bfa..9c7e2e5b151 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/LiteralTest.java
@@ -233,7 +233,9 @@ class LiteralTest {
         PValues.Builder resultContentBuilder = PValues.newBuilder();
         for (int i = 0; i < elementsArray.length; i = i + 2) {
             childBuilder1.addInt32Value(elementsArray[i]);
-            childBuilder2.addStringValue("str" + (i + 1));
+            String strValue = "str" + (i + 1);
+            childBuilder2.addStringValue(strValue);
+            
childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue));
         }
         childBuilder1.setType(childTypeBuilder1.build());
         childBuilder2.setType(childTypeBuilder2.build());
@@ -280,7 +282,9 @@ class LiteralTest {
         PValues.Builder resultContentBuilder = PValues.newBuilder();
         for (int i = 0; i < elementsArray.length; i = i + 2) {
             childBuilder1.addInt32Value(elementsArray[i]);
-            childBuilder2.addStringValue("str" + (i + 1));
+            String strValue = "str" + (i + 1);
+            childBuilder2.addStringValue(strValue);
+            
childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue));
         }
         childBuilder1.setType(childTypeBuilder1.build());
         childBuilder2.setType(childTypeBuilder2.build());
diff --git 
a/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out
 
b/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out
index c7c506292a5..8d9d704684e 100644
--- 
a/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out
+++ 
b/regression-test/data/nereids_p0/expression/fold_constant/fold_constant_by_be.out
@@ -1,4 +1,10 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+C2BD89103557CCBF7ED97B51860225A0
+
+-- !sql --
+C2BD89103557CCBF7ED97B51860225A0
+
 -- !sql_1 --
 80000
 
diff --git 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy
 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy
index 09a80209c04..f3b1b0cdcd5 100644
--- 
a/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy
+++ 
b/regression-test/suites/nereids_p0/expression/fold_constant/fold_constant_by_be.groovy
@@ -22,6 +22,9 @@ suite("fold_constant_by_be") {
     sql 'set enable_fallback_to_original_planner=false'
     sql 'set enable_fold_constant_by_be=true'
 
+    qt_sql """ select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')); """
+    qt_sql """ select hex(s) from (select 
from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t; """
+
     test {
         sql '''
             select if(
@@ -32,8 +35,8 @@ suite("fold_constant_by_be") {
         result([['9999-07-31']])
     }
 
-    sql """ 
-        CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING 
NULL) 
+    sql """
+        CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL)
         UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5 
properties("replication_num" = "1");
     """
 
@@ -53,7 +56,7 @@ suite("fold_constant_by_be") {
 
     sql 'set query_timeout=12;'
     qt_sql "select sleep(sign(1)*5);"
-    
+
     explain {
         sql("verbose select substring('123456', 1, 3)")
         contains "varchar(3)"
@@ -71,7 +74,7 @@ suite("fold_constant_by_be") {
                     col_varchar_1000__undef_signed varchar(1000)  null  ,
                     col_varchar_1000__undef_signed_not_null varchar(1000)  not 
null  ,
                     col_varchar_1001__undef_signed varchar(1001)  null  ,
-                    col_varchar_1001__undef_signed_not_null varchar(1001)  not 
null  
+                    col_varchar_1001__undef_signed_not_null varchar(1001)  not 
null
                     ) engine=olap
                     DUPLICATE KEY(pk, col_char_255__undef_signed, 
col_char_100__undef_signed)
                     distributed by hash(pk) buckets 10


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to