This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new b0b6a9e9888 branch-4.0: [fix](skew_join)should make skewExpr and
skewValues have same datatype #58941 (#59004)
b0b6a9e9888 is described below
commit b0b6a9e9888aa63de1aca187b78717261a6f0788
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 17 14:49:21 2025 +0800
branch-4.0: [fix](skew_join)should make skewExpr and skewValues have same
datatype #58941 (#59004)
Cherry-picked from #58941
Co-authored-by: starocean999 <[email protected]>
---
.../doris/nereids/rules/rewrite/SkewJoin.java | 9 +-
.../nereids_rules_p0/skew_join/skew_join.groovy | 137 +++++++++++++++++++++
2 files changed, 145 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
index dc75dd9f362..f017ec99a19 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SkewJoin.java
@@ -29,6 +29,8 @@ import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.DistributeType;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.util.TypeCoercionUtils;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
@@ -108,8 +110,13 @@ public class SkewJoin extends OneRewriteRuleFactory {
join.setHint(hint);
return join;
} else {
+ List<Expression> newHotValues = new ArrayList<>(hotValues.size());
+ DataType dataType = skewExpr.getDataType();
+ for (Expression value : hotValues) {
+ newHotValues.add(TypeCoercionUtils.castIfNotSameType(value,
dataType));
+ }
DistributeHint hint = new
DistributeHint(DistributeType.SHUFFLE_RIGHT,
- new JoinSkewInfo(skewExpr, hotValues, false));
+ new JoinSkewInfo(skewExpr, newHotValues, false));
join.setHint(hint);
return SaltJoin.transform(join);
}
diff --git a/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
b/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
index c0b3e00e658..a13fb38e828 100644
--- a/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
+++ b/regression-test/suites/nereids_rules_p0/skew_join/skew_join.groovy
@@ -50,4 +50,141 @@ suite("skew_join") {
sql "alter table t2 modify column id set stats ('row_count' = '10000');"
qt_bc_shape "explain shape plan select * from t2 join t1 on t1.id=t2.id;"
qt_bc_exe "select * from t2 join t1 on t1.id=t2.id;"
+
+ multi_sql '''
+ drop table if exists
table_20_undef_partitions2_keys3_properties4_distributed_by5;
+ drop table if exists
table_30_undef_partitions2_keys3_properties4_distributed_by5;
+ drop table if exists
table_30_undef_partitions2_keys3_properties4_distributed_by52;
+ drop table if exists
table_50_undef_partitions2_keys3_properties4_distributed_by5;
+ drop table if exists
table_100_undef_partitions2_keys3_properties4_distributed_by5;
+ drop table if exists
table_100_undef_partitions2_keys3_properties4_distributed_by52;
+ drop table if exists
table_200_undef_partitions2_keys3_properties4_distributed_by5;
+ drop table if exists
table_200_undef_partitions2_keys3_properties4_distributed_by52;
+
+ create table table_20_undef_partitions2_keys3_properties4_distributed_by5 (
+ col_int_undef_signed2 int ,
+ col_int_undef_signed int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed2)
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_20_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,6,4,-3343259,7),(1,null,2,-5659896,0),(2,2,2369913,-5247778,-4711382),(3,6545002,3,2,4),(4,9,3,4,5),(5,4,5,4,1),(6,4,-4704791,null,6),(7,null,3,null,9),(8,-1012411,4,null,-1244656),(9,1,8,9,-5175872),(10,8,0,-4239951,2),(11,8,-2231762,4817469,2),(12,9,9,5,-427963),(13,4,0,null,-5587539),(14,-5949786,2,2,3432246),
[...]
+
+ create table table_30_undef_partitions2_keys3_properties4_distributed_by5 (
+ col_int_undef_signed2 int ,
+ col_int_undef_signed int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed2)
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_30_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,2,null,0,null),(1,-242819,2983243,7071252,3),(2,1,-2342407,-1423905,8),(3,null,null,7,4),(4,-1494065,3,7,2),(5,5,0,-595225,5),(6,5,-3324113,0,5),(7,6829192,3527453,6,5436506),(8,1,-3189592,2,9),(9,null,2,6,2),(10,-4070807,null,-3324205,7),(11,8,-5293967,1,-5040205),(12,6,7440524,null,null),(13,null,2,9,5),(14,4,n
[...]
+
+ create table table_30_undef_partitions2_keys3_properties4_distributed_by52
(
+ col_int_undef_signed int ,
+ col_int_undef_signed2 int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed, col_int_undef_signed2)
+ PARTITION BY RANGE(col_int_undef_signed) (
+ PARTITION p0 VALUES LESS THAN ('4'),
+ PARTITION p1 VALUES LESS THAN ('6'),
+ PARTITION p2 VALUES LESS THAN ('7'),
+ PARTITION p3 VALUES LESS THAN ('8'),
+ PARTITION p4 VALUES LESS THAN ('10'),
+ PARTITION p5 VALUES LESS THAN ('83647'),
+ PARTITION p100 VALUES LESS THAN ('2147483647')
+ )
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_30_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,9,9,null,1),(1,6821639,9,null,-5431086),(2,8,4,6,7701043),(3,2,-6700938,1425835,7),(4,null,1,3,4),(5,8,8,-714745,null),(6,7,3,4447765,null),(7,1,-2101501,0,5),(8,7,0,9,6),(9,4696294,3,2,-3197661),(10,8,4600901,8,1),(11,-1042936,null,-2187191,0),(12,5116430,0,2687672,9),(13,3,3,8,1287742),(14,-3829647,3,4,7510940
[...]
+
+ create table table_50_undef_partitions2_keys3_properties4_distributed_by5 (
+ col_int_undef_signed2 int ,
+ col_int_undef_signed int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed2)
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_50_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,8,0,3,7),(1,6,227612,4,8),(2,-590975,9,-4411568,6),(3,-7241036,null,3,5),(4,1,7,null,8),(5,2509741,5,5,1),(6,2,9,null,4817793),(7,6,8,3,0),(8,null,1,4,null),(9,711269,null,-613109,null),(10,null,7,0,7),(11,null,-5534845,0,4),(12,5,2,9,6850777),(13,-5789051,8,6,2463068),(14,2,5,953451,1),(15,-6229147,-6738861,4,0)
[...]
+
+ create table table_100_undef_partitions2_keys3_properties4_distributed_by5
(
+ col_int_undef_signed2 int ,
+ col_int_undef_signed int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed2)
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_100_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7),(5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9),(10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2),(15,2
[...]
+
+ create table
table_100_undef_partitions2_keys3_properties4_distributed_by52 (
+ col_int_undef_signed int ,
+ col_int_undef_signed2 int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed, col_int_undef_signed2)
+ PARTITION BY RANGE(col_int_undef_signed) (
+ PARTITION p0 VALUES LESS THAN ('4'),
+ PARTITION p1 VALUES LESS THAN ('6'),
+ PARTITION p2 VALUES LESS THAN ('7'),
+ PARTITION p3 VALUES LESS THAN ('8'),
+ PARTITION p4 VALUES LESS THAN ('10'),
+ PARTITION p5 VALUES LESS THAN ('83647'),
+ PARTITION p100 VALUES LESS THAN ('2147483647')
+ )
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_100_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,7865838,-348902,null,8),(1,-9434,9,8,0),(2,1845860,6675073,-7931956,-66007),(3,-7523286,210291,3,4),(4,null,-1341350,-5318642,1),(5,-6634226,2179558,2,7),(6,2,7,2,3),(7,9,2,3,-7773846),(8,0,8,6,2407384),(9,0,1,7,7),(10,5,5,null,8),(11,9,null,8283010,6),(12,7359987,5145929,2,5),(13,0,5225949,0,6770846),(14,1,454
[...]
+
+ create table table_200_undef_partitions2_keys3_properties4_distributed_by5
(
+ col_int_undef_signed2 int ,
+ col_int_undef_signed int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed2)
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_200_undef_partitions2_keys3_properties4_distributed_by5(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,null,7,3,9),(1,6970022,9,6,2),(2,null,0,null,7262031),(3,4,6,null,7236151),(4,789682,7324018,5,5),(5,-2056178,9,0,0),(6,-7081969,-2103366,0,1),(7,3,5,3,3),(8,3175437,4,6,-2017026),(9,3,null,null,7),(10,-5725039,5,2,3),(11,8,9,2,5),(12,-6487649,1,5,-2847073),(13,3415118,null,4,-6786736),(14,null,4,7,1),(15,995946
[...]
+
+ create table
table_200_undef_partitions2_keys3_properties4_distributed_by52 (
+ col_int_undef_signed int ,
+ col_int_undef_signed2 int ,
+ col_int_undef_signed3 int ,
+ col_int_undef_signed4 int ,
+ pk int
+ ) engine=olap
+ DUPLICATE KEY(col_int_undef_signed, col_int_undef_signed2)
+ PARTITION BY RANGE(col_int_undef_signed) (
+ PARTITION p0 VALUES LESS THAN ('4'),
+ PARTITION p1 VALUES LESS THAN ('6'),
+ PARTITION p2 VALUES LESS THAN ('7'),
+ PARTITION p3 VALUES LESS THAN ('8'),
+ PARTITION p4 VALUES LESS THAN ('10'),
+ PARTITION p5 VALUES LESS THAN ('83647'),
+ PARTITION p100 VALUES LESS THAN ('2147483647')
+ )
+
+ distributed by hash(pk) buckets 10
+ properties("replication_num" = "1");
+ insert into
table_200_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values
(0,null,6178782,4,-1498997),(1,null,null,2,4),(2,8,6,6114625,6840353),(3,6,-3487226,4,-18364),(4,6647558,0,7,4),(5,5,1,3,3991803),(6,null,3,3,6),(7,-1597140,3,3,2),(8,6415967,null,9,null),(9,0,2,-1569216,8263281),(10,2546741,4,-4334118,8),(11,2375117,5,null,-3767162),(12,4,290235,null,6),(13,5569849,8,6,null),(14,
[...]
+ '''
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]