This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new b0b6a9e9888 branch-4.0: [fix](skew_join)should make skewExpr and 
skewValues have same datatype #58941 (#59004)
b0b6a9e9888 is described below

commit b0b6a9e9888aa63de1aca187b78717261a6f0788
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 17 14:49:21 2025 +0800

    branch-4.0: [fix](skew_join)should make skewExpr and skewValues have same 
datatype #58941 (#59004)
    
    Cherry-picked from #58941
    
    Co-authored-by: starocean999 <[email protected]>
---
 .../doris/nereids/rules/rewrite/SkewJoin.java      |   9 +-
 .../nereids_rules_p0/skew_join/skew_join.groovy    | 137 +++++++++++++++++++++
 2 files changed, 145 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
index dc75dd9f362..f017ec99a19 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
@@ -29,6 +29,8 @@ import org.apache.doris.nereids.trees.plans.AbstractPlan;
 import org.apache.doris.nereids.trees.plans.DistributeType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.util.TypeCoercionUtils;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.SessionVariable;
 
@@ -108,8 +110,13 @@ public class SkewJoin extends OneRewriteRuleFactory {
             join.setHint(hint);
             return join;
         } else {
+            List<Expression> newHotValues = new ArrayList<>(hotValues.size());
+            DataType dataType = skewExpr.getDataType();
+            for (Expression value : hotValues) {
+                newHotValues.add(TypeCoercionUtils.castIfNotSameType(value, 
dataType));
+            }
             DistributeHint hint = new 
DistributeHint(DistributeType.SHUFFLE_RIGHT,
-                    new JoinSkewInfo(skewExpr, hotValues, false));
+                    new JoinSkewInfo(skewExpr, newHotValues, false));
             join.setHint(hint);
             return SaltJoin.transform(join);
         }
diff --git a/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy 
b/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
index c0b3e00e658..a13fb38e828 100644
--- a/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
+++ b/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
@@ -50,4 +50,141 @@ suite("skew_join") {
     sql "alter table t2 modify column id set stats ('row_count' = '10000');"
     qt_bc_shape "explain shape plan select * from t2 join t1 on t1.id=t2.id;"
     qt_bc_exe "select * from t2 join t1 on t1.id=t2.id;"
+
+    multi_sql '''
+    drop table if exists 
table_20_undef_partitions2_keys3_properties4_distributed_by5;
+    drop table if exists 
table_30_undef_partitions2_keys3_properties4_distributed_by5;
+    drop table if exists 
table_30_undef_partitions2_keys3_properties4_distributed_by52;
+    drop table if exists 
table_50_undef_partitions2_keys3_properties4_distributed_by5;
+    drop table if exists 
table_100_undef_partitions2_keys3_properties4_distributed_by5;
+    drop table if exists 
table_100_undef_partitions2_keys3_properties4_distributed_by52;
+    drop table if exists 
table_200_undef_partitions2_keys3_properties4_distributed_by5;
+    drop table if exists 
table_200_undef_partitions2_keys3_properties4_distributed_by52;
+
+    create table table_20_undef_partitions2_keys3_properties4_distributed_by5 (
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed2)
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_20_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,6,4,-3343259,7),(1,null,2,-5659896,0),(2,2,2369913,-5247778,-4711382),(3,6545002,3,2,4),(4,9,3,4,5),(5,4,5,4,1),(6,4,-4704791,null,6),(7,null,3,null,9),(8,-1012411,4,null,-1244656),(9,1,8,9,-5175872),(10,8,0,-4239951,2),(11,8,-2231762,4817469,2),(12,9,9,5,-427963),(13,4,0,null,-5587539),(14,-5949786,2,2,3432246),
 [...]
+
+    create table table_30_undef_partitions2_keys3_properties4_distributed_by5 (
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed2)
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_30_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,2,null,0,null),(1,-242819,2983243,7071252,3),(2,1,-2342407,-1423905,8),(3,null,null,7,4),(4,-1494065,3,7,2),(5,5,0,-595225,5),(6,5,-3324113,0,5),(7,6829192,3527453,6,5436506),(8,1,-3189592,2,9),(9,null,2,6,2),(10,-4070807,null,-3324205,7),(11,8,-5293967,1,-5040205),(12,6,7440524,null,null),(13,null,2,9,5),(14,4,n
 [...]
+
+    create table table_30_undef_partitions2_keys3_properties4_distributed_by52 
(
+    col_int_undef_signed int    ,
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed, col_int_undef_signed2)
+    PARTITION BY             RANGE(col_int_undef_signed) (
+                    PARTITION p0 VALUES LESS THAN ('4'),
+                    PARTITION p1 VALUES LESS THAN ('6'),
+                    PARTITION p2 VALUES LESS THAN ('7'),
+                    PARTITION p3 VALUES LESS THAN ('8'),
+                    PARTITION p4 VALUES LESS THAN ('10'),
+                    PARTITION p5 VALUES LESS THAN ('83647'),
+                    PARTITION p100 VALUES LESS THAN ('2147483647')
+                )
+            
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_30_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,9,9,null,1),(1,6821639,9,null,-5431086),(2,8,4,6,7701043),(3,2,-6700938,1425835,7),(4,null,1,3,4),(5,8,8,-714745,null),(6,7,3,4447765,null),(7,1,-2101501,0,5),(8,7,0,9,6),(9,4696294,3,2,-3197661),(10,8,4600901,8,1),(11,-1042936,null,-2187191,0),(12,5116430,0,2687672,9),(13,3,3,8,1287742),(14,-3829647,3,4,7510940
 [...]
+
+    create table table_50_undef_partitions2_keys3_properties4_distributed_by5 (
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed2)
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_50_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,8,0,3,7),(1,6,227612,4,8),(2,-590975,9,-4411568,6),(3,-7241036,null,3,5),(4,1,7,null,8),(5,2509741,5,5,1),(6,2,9,null,4817793),(7,6,8,3,0),(8,null,1,4,null),(9,711269,null,-613109,null),(10,null,7,0,7),(11,null,-5534845,0,4),(12,5,2,9,6850777),(13,-5789051,8,6,2463068),(14,2,5,953451,1),(15,-6229147,-6738861,4,0)
 [...]
+
+    create table table_100_undef_partitions2_keys3_properties4_distributed_by5 
(
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed2)
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_100_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7),(5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9),(10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2),(15,2
 [...]
+
+    create table 
table_100_undef_partitions2_keys3_properties4_distributed_by52 (
+    col_int_undef_signed int    ,
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed, col_int_undef_signed2)
+    PARTITION BY             RANGE(col_int_undef_signed) (
+                    PARTITION p0 VALUES LESS THAN ('4'),
+                    PARTITION p1 VALUES LESS THAN ('6'),
+                    PARTITION p2 VALUES LESS THAN ('7'),
+                    PARTITION p3 VALUES LESS THAN ('8'),
+                    PARTITION p4 VALUES LESS THAN ('10'),
+                    PARTITION p5 VALUES LESS THAN ('83647'),
+                    PARTITION p100 VALUES LESS THAN ('2147483647')
+                )
+            
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_100_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,7865838,-348902,null,8),(1,-9434,9,8,0),(2,1845860,6675073,-7931956,-66007),(3,-7523286,210291,3,4),(4,null,-1341350,-5318642,1),(5,-6634226,2179558,2,7),(6,2,7,2,3),(7,9,2,3,-7773846),(8,0,8,6,2407384),(9,0,1,7,7),(10,5,5,null,8),(11,9,null,8283010,6),(12,7359987,5145929,2,5),(13,0,5225949,0,6770846),(14,1,454
 [...]
+
+    create table table_200_undef_partitions2_keys3_properties4_distributed_by5 
(
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed2)
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_200_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,null,7,3,9),(1,6970022,9,6,2),(2,null,0,null,7262031),(3,4,6,null,7236151),(4,789682,7324018,5,5),(5,-2056178,9,0,0),(6,-7081969,-2103366,0,1),(7,3,5,3,3),(8,3175437,4,6,-2017026),(9,3,null,null,7),(10,-5725039,5,2,3),(11,8,9,2,5),(12,-6487649,1,5,-2847073),(13,3415118,null,4,-6786736),(14,null,4,7,1),(15,995946
 [...]
+
+    create table 
table_200_undef_partitions2_keys3_properties4_distributed_by52 (
+    col_int_undef_signed int    ,
+    col_int_undef_signed2 int    ,
+    col_int_undef_signed3 int    ,
+    col_int_undef_signed4 int    ,
+    pk int
+    ) engine=olap
+    DUPLICATE KEY(col_int_undef_signed, col_int_undef_signed2)
+    PARTITION BY             RANGE(col_int_undef_signed) (
+                    PARTITION p0 VALUES LESS THAN ('4'),
+                    PARTITION p1 VALUES LESS THAN ('6'),
+                    PARTITION p2 VALUES LESS THAN ('7'),
+                    PARTITION p3 VALUES LESS THAN ('8'),
+                    PARTITION p4 VALUES LESS THAN ('10'),
+                    PARTITION p5 VALUES LESS THAN ('83647'),
+                    PARTITION p100 VALUES LESS THAN ('2147483647')
+                )
+            
+    distributed by hash(pk) buckets 10
+    properties("replication_num" = "1");
+    insert into 
table_200_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
 values 
(0,null,6178782,4,-1498997),(1,null,null,2,4),(2,8,6,6114625,6840353),(3,6,-3487226,4,-18364),(4,6647558,0,7,4),(5,5,1,3,3991803),(6,null,3,3,6),(7,-1597140,3,3,2),(8,6415967,null,9,null),(9,0,2,-1569216,8263281),(10,2546741,4,-4334118,8),(11,2375117,5,null,-3767162),(12,4,290235,null,6),(13,5569849,8,6,null),(14,
 [...]
+    '''
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to