This is an automated email from the ASF dual-hosted git repository. liyang pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 46e6637071172e287aaf1b630aad7989952e3549 Author: Pengfei.Zhan <pengfei.z...@kyligence.io> AuthorDate: Thu Nov 23 18:57:28 2023 +0800 KYLIN-5878 ReduceExpressionRule does not work for OlapRel Calcite sometimes gives inappropriate cost in VolcanoPlanner, let the ReduceExpressionRule run again in post-optimization so that SegmentPruningRule and FilePruner can work. ReduceExpressionRule can only work for LogicalProject, however, OlapProjectRel extends Project. Therefore, the conversion use these rules in the ReduceExpressionRule don't work. This pull request will define some rules in the KYLIN to overwrite the default ones. --- pom.xml | 2 +- .../kylin/query/rules/FilterSimplifyRuleTest.java | 4 + .../query/rules/ScalarSubqueryJoinRuleTest.java | 2 + .../kylin/query/rules/AggPushDownRuleTest.xml | 17 ++- .../kylin/query/rules/FilterSimplifyRuleTest.xml | 4 +- .../query/rules/ScalarSubqueryJoinRuleTest.xml | 18 +-- .../kylin/query/routing/SegmentPruningRule.java | 168 +-------------------- .../java/org/apache/kylin/query/util/RexUtils.java | 10 +- .../apache/kylin/util/FilterConditionExpander.java | 14 +- .../apache/kylin/query/engine/PlannerFactory.java | 16 +- .../org/apache/kylin/query/engine/QueryExec.java | 14 ++ .../kylin/query/optrule/OlapProjectMergeRule.java | 41 ++++- .../query/optrule/OlapReduceExpressionRule.java | 47 ++++++ 13 files changed, 160 insertions(+), 197 deletions(-) diff --git a/pom.xml b/pom.xml index 091ee90583..a0cd96eef0 100644 --- a/pom.xml +++ b/pom.xml @@ -138,7 +138,7 @@ <scala-retry>0.3.0</scala-retry> <!-- Calcite Version --> - <calcite.version>1.116.0-kylin-4.x-r038</calcite.version> + <calcite.version>1.116.0-kylin-4.x-r039</calcite.version> <avatica.version>4.x_1.10-r01</avatica.version> <!-- Hadoop Common deps, keep compatible with hadoop2.version --> diff --git a/src/kylin-it/src/test/java/org/apache/kylin/query/rules/FilterSimplifyRuleTest.java b/src/kylin-it/src/test/java/org/apache/kylin/query/rules/FilterSimplifyRuleTest.java index 000d6ef677..c55195beb7 100644 --- a/src/kylin-it/src/test/java/org/apache/kylin/query/rules/FilterSimplifyRuleTest.java +++ b/src/kylin-it/src/test/java/org/apache/kylin/query/rules/FilterSimplifyRuleTest.java @@ -28,6 +28,8 @@ import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Pair; import org.apache.kylin.query.optrule.FilterJoinConditionMergeRule; import org.apache.kylin.query.optrule.FilterSimplifyRule; +import org.apache.kylin.query.optrule.OlapFilterRule; +import org.apache.kylin.query.optrule.OlapReduceExpressionRule; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -56,8 +58,10 @@ public class FilterSimplifyRuleTest extends CalciteRuleTestBase { @Test public void test() throws IOException { final List<RelOptRule> rules = new ArrayList<>(); + rules.add(OlapReduceExpressionRule.FILTER_INSTANCE); rules.add(FilterSimplifyRule.INSTANCE); rules.add(FilterJoinConditionMergeRule.INSTANCE); + rules.add(OlapFilterRule.INSTANCE); List<Pair<String, String>> queries = readALLSQLs(KylinConfig.getInstanceFromEnv(), defaultProject, "query/sql_filter_simplify"); diff --git a/src/kylin-it/src/test/java/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.java b/src/kylin-it/src/test/java/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.java index e8e733a61b..356c71e76a 100644 --- a/src/kylin-it/src/test/java/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.java +++ b/src/kylin-it/src/test/java/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.java @@ -32,6 +32,7 @@ import org.apache.kylin.query.optrule.AggregateProjectReduceRule; import org.apache.kylin.query.optrule.OlapAggregateRule; import org.apache.kylin.query.optrule.OlapJoinRule; import org.apache.kylin.query.optrule.OlapProjectRule; +import org.apache.kylin.query.optrule.OlapReduceExpressionRule; import org.apache.kylin.query.optrule.ScalarSubqueryJoinRule; import org.junit.After; import org.junit.Before; @@ -104,6 +105,7 @@ public class ScalarSubqueryJoinRuleTest extends CalciteRuleTestBase { ProjectMergeRule.INSTANCE, // AggregateProjectMergeRule.INSTANCE, // AggregateProjectReduceRule.INSTANCE, // + OlapReduceExpressionRule.PROJECT_INSTANCE, // // target rules ScalarSubqueryJoinRule.AGG_JOIN, // ScalarSubqueryJoinRule.AGG_PRJ_JOIN, // diff --git a/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/AggPushDownRuleTest.xml b/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/AggPushDownRuleTest.xml index a1e346e839..4d239c901d 100644 --- a/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/AggPushDownRuleTest.xml +++ b/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/AggPushDownRuleTest.xml @@ -39,7 +39,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_SUM_ok=[SUM($1)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($0, 164000), '[0,164000)', <($0, 166000), '[164000, 166000)', '[166000, +∞')], PRICE=[$1], ctx=[]) OlapAggregateRel(group-set=[[1]], groups=[null], usr_GMV_SUM_ok=[SUM($2)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], X_measure__0=[$3], usr_GMV_SUM_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_SUM_ok=[SUM($8)], ctx=[]) @@ -48,6 +48,7 @@ OlapToEnumerableConverter OlapProjectRel(LSTG_FORMAT_NAME=[$3], PRICE=[$8], ctx=[]) OlapFilterRel(condition=[NOT(=($3, 'Auction'))], ctx=[]) OlapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]) + ]]> </Resource> <Resource name="query02.planBefore"> @@ -70,7 +71,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0, 1]], groups=[null], usr_GMV_SUM_ok=[SUM($2)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($1, 164000), '[0,164000)', <($1, 166000), '[164000, 166000)', '[166000, +∞')], LSTG_FORMAT_NAME=[$0], PRICE=[$2], ctx=[]) OlapAggregateRel(group-set=[[1, 2]], groups=[null], usr_GMV_SUM_ok=[SUM($3)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], LSTG_FORMAT_NAME0=[$2], X_measure__0=[$3], usr_GMV_SUM_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_SUM_ok=[SUM($8)], ctx=[]) @@ -264,7 +265,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_SUM_ok=[SUM($1)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($0, 164000), '[0,164000)', <($0, 166000), '[164000, 166000)', '[166000, +∞')], $1=[$1], ctx=[]) OlapAggregateRel(group-set=[[1]], groups=[null], usr_GMV_SUM_ok=[SUM($2)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], X_measure__0=[$3], usr_GMV_SUM_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_SUM_ok=[SUM($8)], ctx=[]) @@ -295,7 +296,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0, 1]], groups=[null], usr_GMV_SUM_ok=[SUM($2)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($1, 164000), '[0,164000)', <($1, 166000), '[164000, 166000)', '[166000, +∞')], LSTG_FORMAT_NAME=[$0], $2=[$2], ctx=[]) OlapAggregateRel(group-set=[[1, 2]], groups=[null], usr_GMV_SUM_ok=[SUM($3)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], LSTG_FORMAT_NAME0=[$2], X_measure__0=[$3], usr_GMV_SUM_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_SUM_ok=[SUM($8)], ctx=[]) @@ -326,7 +327,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_SUM_ok=[SUM($1)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($0, 164000), '[0,164000)', <($0, 166000), '[164000, 166000)', '[166000, +∞')], $1=[$1], ctx=[]) OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_SUM_ok=[SUM($2)], ctx=[]) - OlapFilterRel(condition=[NOT(=($1, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($1, 'Auction')], ctx=[]) OlapProjectRel(X_measure__0=[$1], LSTG_FORMAT_NAME=[$2], usr_GMV_SUM_ok=[$3], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[0]], groups=[null], X_measure__0=[SUM($1)], ctx=[]) @@ -361,7 +362,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_SUM_ok=[SUM($1)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($0, 164000), '[0,164000)', <($0, 166000), '[164000, 166000)', '[166000, +∞')], $1=[$1], ctx=[]) OlapAggregateRel(group-set=[[1]], groups=[null], usr_GMV_SUM_ok=[SUM($2)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], X_measure__0=[$3], usr_GMV_SUM_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_SUM_ok=[SUM($8)], ctx=[]) @@ -425,7 +426,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_SUM_ok=[SUM($1)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(AND(<($1, 164000), =($0, 'ABIN')), '[0,164000)', AND(<($1, 166000), =($0, 'ABIN')), '[164000, 166000)', '[166000, +∞')], $2=[$2], ctx=[]) OlapAggregateRel(group-set=[[1, 2]], groups=[null], usr_GMV_SUM_ok=[SUM($3)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], LSTG_FORMAT_NAME0=[$2], X_measure__0=[$3], usr_GMV_SUM_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_SUM_ok=[SUM($8)], ctx=[]) @@ -456,7 +457,7 @@ OlapToEnumerableConverter OlapAggregateRel(group-set=[[0]], groups=[null], usr_GMV_COUNT_ok=[$SUM0($1)], ctx=[]) OlapProjectRel(Calculation_1162491713472385025=[CASE(<($0, 164000), '[0,164000)', <($0, 166000), '[164000, 166000)', '[166000, +∞')], $1=[$1], ctx=[]) OlapAggregateRel(group-set=[[1]], groups=[null], usr_GMV_COUNT_ok=[$SUM0($2)], ctx=[]) - OlapFilterRel(condition=[NOT(=($0, 'Auction'))], ctx=[]) + OlapFilterRel(condition=[<>($0, 'Auction')], ctx=[]) OlapProjectRel(LSTG_FORMAT_NAME=[$0], X_measure__0=[$3], usr_GMV_COUNT_ok=[$1], ctx=[]) OlapJoinRel(condition=[=($0, $2)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[3]], groups=[null], usr_GMV_COUNT_ok=[COUNT($8)], ctx=[]) diff --git a/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/FilterSimplifyRuleTest.xml b/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/FilterSimplifyRuleTest.xml index e77c4ead93..24ed9c76ba 100644 --- a/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/FilterSimplifyRuleTest.xml +++ b/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/FilterSimplifyRuleTest.xml @@ -45,7 +45,7 @@ OlapToEnumerableConverter OlapProjectRel(EXPR$0=[$1], ctx=[]) OlapAggregateRel(group-set=[[0]], groups=[null], EXPR$0=[COUNT()], ctx=[]) OlapProjectRel(CAL_DT=[$2], ctx=[]) - LogicalFilter(condition=[AND(>($1, 10), >($2, 1992-01-01))]) + OlapFilterRel(condition=[AND(>($1, 10), >($2, 1992-01-01))], ctx=[]) OlapJoinRel(condition=[=($52, $58)], joinType=[inner], ctx=[]) OlapJoinRel(condition=[=($47, $54)], joinType=[inner], ctx=[]) OlapJoinRel(condition=[=($7, $49)], joinType=[inner], ctx=[]) @@ -77,7 +77,7 @@ OlapToEnumerableConverter OlapProjectRel(EXPR$0=[$1], ctx=[]) OlapAggregateRel(group-set=[[0]], groups=[null], EXPR$0=[COUNT()], ctx=[]) OlapProjectRel(CAL_DT=[$2], ctx=[]) - LogicalFilter(condition=[AND(>($2, 1992-01-01), IN(SUBSTRING($3, 1, 1), 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'))]) + OlapFilterRel(condition=[AND(>($2, 1992-01-01), IN(SUBSTRING($3, 1, 1), 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'))], ctx=[]) OlapJoinRel(condition=[=($1, $39)], joinType=[inner], ctx=[]) OlapTableScan(table=[[DEFAULT, TEST_KYLIN_FACT]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38]]) OlapTableScan(table=[[DEFAULT, TEST_ORDER]], ctx=[], fields=[[0, 1, 2, 3, 4]]) diff --git a/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.xml b/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.xml index 9dbf460488..46e750b4b6 100644 --- a/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.xml +++ b/src/kylin-it/src/test/resources/org/apache/kylin/query/rules/ScalarSubqueryJoinRuleTest.xml @@ -32,7 +32,7 @@ OlapToEnumerableConverter <Resource name="query01.planAfter"> <![CDATA[ OlapToEnumerableConverter - OlapProjectRel(D_DATEKEY=[$1], DATE_TIME=[$0], TOTAL=[$2], ctx=[]) + OlapProjectRel(D_DATEKEY=[$1], DATE_TIME=['1995-03-01'], TOTAL=[$2], ctx=[]) OlapAggregateRel(group-set=[[0, 1]], groups=[null], TOTAL=[SUM($2)], ctx=[]) OlapJoinRel(condition=[=($0, $1)], joinType=[left], ctx=[]) OlapProjectRel(DATE_TIME=['1995-03-01'], ctx=[]) @@ -161,7 +161,7 @@ OlapToEnumerableConverter OlapToEnumerableConverter OlapProjectRel(SD=[$3], D_DATE=[$1], EXPR$2=[$4], DATE_TIME=[$0], SY=[$5], SR=[$6], LO_ORDERDATE=[$2], ctx=[]) OlapAggregateRel(group-set=[[0, 1, 2]], groups=[null], SD=[SUM($8)], EXPR$2=[$SUM0($9)], SY=[SUM($10)], SR=[SUM($11)], ctx=[]) - OlapProjectRel(DATE_TIME=[$0], D_DATE=[$1], LO_ORDERDATE=[$5], EXPR$2=[$3], SY=[$4], SD=[$2], $f1=[$6], SR=[$7], $f8=[CAST(*($2, $6)):BIGINT], $f9=[*($3, $6)], $f10=[CAST(*($4, $6)):BIGINT], $f11=[CAST(*($3, $7)):BIGINT], ctx=[]) + OlapProjectRel(DATE_TIME=[$0], D_DATE=[$1], LO_ORDERDATE=[$5], EXPR$2=[$3], SY=[$4], SD=[$2], $f1=[$6], SR=[$7], $f8=[*($2, $6)], $f9=[*($3, $6)], $f10=[*($4, $6)], $f11=[*($3, $7)], ctx=[]) OlapJoinRel(condition=[=($0, $5)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[0, 1]], groups=[null], SD=[SUM($2)], EXPR$2=[$SUM0($3)], SY=[SUM($4)], ctx=[]) OlapProjectRel(DATE_TIME=[$0], D_DATE=[$1], SD=[$2], $f3=[CASE(IS NULL($3), CAST(1):BIGINT, $3)], SY=[$4], ctx=[]) @@ -453,7 +453,7 @@ OlapToEnumerableConverter OlapToEnumerableConverter OlapProjectRel(DATE_TIME=[$0], EXPR$1=[$1], EXPR$2=[$2], EXPR$3=[$3], EXPR$4=[$3], EXPR$5=[$4], SY=[$5], SR=[$6], ctx=[]) OlapAggregateRel(group-set=[[0]], groups=[null], EXPR$1=[$SUM0($8)], EXPR$2=[$SUM0($9)], EXPR$3=[$SUM0($10)], EXPR$5=[$SUM0($11)], SY=[SUM($12)], SR=[SUM($13)], ctx=[]) - OlapProjectRel(DATE_TIME=[$0], $f1=[$1], EXPR$3=[$2], SY=[$3], LO_ORDERDATE=[$4], $f10=[$5], $f2=[$6], SR=[$7], $f8=[*($1, $5)], $f9=[*($1, $6)], $f1011=[*($2, $6)], $f11=[*($2, $5)], $f12=[CAST(*($3, $6)):BIGINT], $f13=[CAST(*($2, $7)):BIGINT], ctx=[]) + OlapProjectRel(DATE_TIME=[$0], $f1=[$1], EXPR$3=[$2], SY=[$3], LO_ORDERDATE=[$4], $f10=[$5], $f2=[$6], SR=[$7], $f8=[*($1, $5)], $f9=[*($1, $6)], $f1011=[*($2, $6)], $f11=[*($2, $5)], $f12=[*($3, $6)], $f13=[*($2, $7)], ctx=[]) OlapJoinRel(condition=[=($0, $4)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[0]], groups=[null], agg#0=[$SUM0($2)], EXPR$3=[$SUM0($3)], SY=[SUM($4)], ctx=[]) OlapProjectRel(DATE_TIME=[$0], D_DATE=[$1], $f2=[CASE(IS NULL($2), CAST(0):BIGINT, $2)], $f3=[CASE(IS NULL($3), CAST(1):BIGINT, $3)], SY=[$4], ctx=[]) @@ -662,7 +662,7 @@ OlapToEnumerableConverter <Resource name="query17.planAfter"> <![CDATA[ OlapToEnumerableConverter - OlapProjectRel(C_NAME=[$1], COUNT_ORDER=[$3], LO_ORDERDATE=[$0], DATE_TIME=[$2], SUM_CUSTKEY=[$4], ctx=[]) + OlapProjectRel(C_NAME=[$1], COUNT_ORDER=[$3], LO_ORDERDATE=[CAST('1996-01-02'):INTEGER], DATE_TIME=['1996-01-02'], SUM_CUSTKEY=[$4], ctx=[]) OlapAggregateRel(group-set=[[0, 1, 4]], groups=[null], COUNT_ORDER=[$SUM0($2)], SUM_CUSTKEY=[SUM($3)], ctx=[]) OlapJoinRel(condition=[=($0, $4)], joinType=[inner], ctx=[]) OlapAggregateRel(group-set=[[5, 22]], groups=[null], COUNT_ORDER=[COUNT($0)], SUM_CUSTKEY=[SUM($21)], ctx=[]) @@ -714,9 +714,9 @@ OlapToEnumerableConverter <Resource name="query18.planAfter"> <![CDATA[ OlapToEnumerableConverter - OlapProjectRel(C_NAME=[$2], COUNT_ORDER=[$4], LO_ORDERDATE=[$1], DATE_TIME=[$0], DATE_TIME0=[$3], SUM_CUSTKEY=[$5], ctx=[]) + OlapProjectRel(C_NAME=[$2], COUNT_ORDER=[$4], LO_ORDERDATE=[$1], DATE_TIME=['1996-01-02'], DATE_TIME0=[$3], SUM_CUSTKEY=[$5], ctx=[]) OlapAggregateRel(group-set=[[0, 1, 2, 3]], groups=[null], COUNT_ORDER=[$SUM0($4)], SUM_CUSTKEY=[SUM($5)], ctx=[]) - OlapProjectRel(DATE_TIME=[$0], LO_ORDERDATE=[$1], C_NAME=[$2], DATE_TIME0=[$3], $f4=[CASE(IS NULL($4), CAST(0):BIGINT, $4)], SUM_CUSTKEY=[$5], ctx=[]) + OlapProjectRel(DATE_TIME=['1996-01-02'], LO_ORDERDATE=[$1], C_NAME=[$2], DATE_TIME0=[$3], $f4=[CASE(IS NULL($4), CAST(0):BIGINT, $4)], SUM_CUSTKEY=[$5], ctx=[]) OlapJoinRel(condition=[=($0, $1)], joinType=[left], ctx=[]) OlapProjectRel(DATE_TIME=['1996-01-02'], ctx=[]) OlapValuesRel(tuples=[[{ 0 }]]) @@ -764,9 +764,9 @@ OlapToEnumerableConverter <Resource name="query19.planAfter"> <![CDATA[ OlapToEnumerableConverter - OlapProjectRel(EXPR$0=[$2], LO_ORDERDATE=[$0], DATE_TIME=[$1], ctx=[]) + OlapProjectRel(EXPR$0=[$2], LO_ORDERDATE=[CAST(19931014):INTEGER], DATE_TIME=[19931014], ctx=[]) OlapAggregateRel(group-set=[[0, 1]], groups=[null], EXPR$0=[SUM($2)], ctx=[]) - OlapProjectRel(LO_ORDERDATE=[$5], DATE_TIME=[$18], $f2=[*($9, 3)], ctx=[]) + OlapProjectRel(LO_ORDERDATE=[CAST(19931014):INTEGER], DATE_TIME=[19931014], $f2=[*($9, 3)], ctx=[]) OlapJoinRel(condition=[=($5, $18)], joinType=[inner], ctx=[]) OlapTableScan(table=[[SSB, P_LINEORDER]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]]) OlapProjectRel(DATE_TIME=[19931014], ctx=[]) @@ -775,4 +775,4 @@ OlapToEnumerableConverter </Resource> </TestCase> -</Root> \ No newline at end of file +</Root> diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/SegmentPruningRule.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/SegmentPruningRule.java index 0a8793410a..1cd540e406 100644 --- a/src/query-common/src/main/java/org/apache/kylin/query/routing/SegmentPruningRule.java +++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/SegmentPruningRule.java @@ -18,32 +18,17 @@ package org.apache.kylin.query.routing; -import java.util.ArrayList; -import java.util.Calendar; import java.util.List; import java.util.Locale; -import java.util.Set; -import java.util.TimeZone; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.calcite.plan.RelOptPredicateList; -import org.apache.calcite.rel.type.RelDataTypeFamily; -import org.apache.calcite.rel.type.RelDataTypeSystem; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; -import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexSimplify; -import org.apache.calcite.sql.SqlCollation; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.type.BasicSqlType; -import org.apache.calcite.sql.type.SqlTypeFamily; -import org.apache.calcite.sql.type.SqlTypeName; -import org.apache.calcite.util.DateString; -import org.apache.calcite.util.NlsString; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.time.DateUtils; import org.apache.kylin.common.KylinConfig; @@ -51,7 +36,6 @@ import org.apache.kylin.common.exception.KylinRuntimeException; import org.apache.kylin.common.exception.KylinTimeoutException; import org.apache.kylin.common.util.DateFormat; import org.apache.kylin.common.util.Pair; -import org.apache.kylin.guava30.shaded.common.collect.ImmutableSet; import org.apache.kylin.guava30.shaded.common.collect.Lists; import org.apache.kylin.metadata.cube.cuboid.NLayoutCandidate; import org.apache.kylin.metadata.cube.model.NDataSegment; @@ -80,18 +64,10 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class SegmentPruningRule extends PruningRule { - private static final TimeZone UTC_ZONE = TimeZone.getTimeZone("UTC"); - private static final Pattern DATE_PATTERN = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); private static final Pattern TIMESTAMP_PATTERN = Pattern .compile("\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}(\\.\\d*[1-9])?"); - public static final Set<SqlKind> COMPARISON_OP_KIND_SET = ImmutableSet.of(SqlKind.GREATER_THAN, - SqlKind.GREATER_THAN_OR_EQUAL, // - SqlKind.LESS_THAN, SqlKind.LESS_THAN_OR_EQUAL, // - SqlKind.IN, SqlKind.NOT_IN, // - SqlKind.EQUALS, SqlKind.NOT_EQUALS); - @Override public void apply(Candidate candidate) { List<IRealization> realizations = candidate.getRealization().getRealizations(); @@ -127,38 +103,11 @@ public class SegmentPruningRule extends PruningRule { // pruner segment by simplify sql filter val relOptCluster = olapContext.getFirstTableScan().getCluster(); - val rexBuilder = relOptCluster.getRexBuilder(); val rexSimplify = new RexSimplify(relOptCluster.getRexBuilder(), RelOptPredicateList.EMPTY, true, relOptCluster.getPlanner().getExecutor()); - var filterConditions = olapContext.getExpandedFilterConditions(); - val dateFormat = partitionCol.getPartitionDateFormat(); - val partitionColRef = partitionCol.getPartitionDateColumnRef(); - RexInputRef partitionColInputRef = null; - if (needRewritePartitionColInFilter(dataflow, olapContext)) { - partitionColInputRef = RexUtils.transformColumn2RexInputRef(partitionColRef, - olapContext.getAllTableScans()); - try { - val firstSegmentRanges = transformSegment2RexCall(allReadySegments.get(0), dateFormat, rexBuilder, - partitionColInputRef, partitionColRef.getType(), dataflow.isStreaming()); - RelDataTypeFamily segmentLiteralTypeFamily = getSegmentLiteralTypeFamily(firstSegmentRanges.getFirst()); - List<RexNode> filterRexNodeList = new ArrayList<>(); - for (RexNode filterCondition : filterConditions) { - RexNode rexNode = rewriteRexCall(filterCondition, rexBuilder, segmentLiteralTypeFamily, - partitionColInputRef, dateFormat); - filterRexNodeList.add(rexNode); - } - filterConditions = filterRexNodeList; - } catch (Exception ex) { - log.warn("Segment pruning error: ", ex); - if (canPruneSegmentsForMaxMeasure(dataflow, olapContext, partitionColRef)) { - return selectSegmentsForMaxMeasure(dataflow); - } - return allReadySegments; - } - } - - RexNode simplifiedSqlFilter = rexSimplify.simplifyAnds(filterConditions); + // this step may spend too long time, should be handled by https://olapio.atlassian.net/browse/KE-42295 + RexNode simplifiedSqlFilter = rexSimplify.simplifyAnds(olapContext.getExpandedFilterConditions()); if (simplifiedSqlFilter.isAlwaysFalse()) { log.info("SQL filter condition is always false, pruning all ready segments"); olapContext.getStorageContext().setFilterCondAlwaysFalse(true); @@ -166,6 +115,7 @@ public class SegmentPruningRule extends PruningRule { } // pruner segment by customized scene optimize + val partitionColRef = partitionCol.getPartitionDateColumnRef(); if (canPruneSegmentsForMaxMeasure(dataflow, olapContext, partitionColRef)) { return selectSegmentsForMaxMeasure(dataflow); } @@ -181,6 +131,8 @@ public class SegmentPruningRule extends PruningRule { } // prune segments by partition filter + RexInputRef partitionColInputRef = RexUtils.transformColumn2RexInputRef(partitionColRef, + olapContext.getAllTableScans()); Segments<NDataSegment> selectedSegments = pruneSegmentsByPartitionFilter(dataflow, olapContext, rexSimplify, partitionColInputRef, simplifiedSqlFilter); log.info("Scan segment.size: {} after segment pruning", selectedSegments.size()); @@ -239,11 +191,6 @@ public class SegmentPruningRule extends PruningRule { return selectedSegments; } - private boolean needRewritePartitionColInFilter(NDataflow dataflow, OlapContext olapContext) { - return !dataflow.getQueryableSegments().isEmpty() && olapContext.getFilterColumns() - .contains(getPartitionDesc(dataflow, olapContext).getPartitionDateColumnRef()); - } - private boolean isFullBuildModel(PartitionDesc partitionCol) { return PartitionDesc.isEmptyPartitionDesc(partitionCol) || partitionCol.getPartitionDateFormat() == null; } @@ -310,111 +257,6 @@ public class SegmentPruningRule extends PruningRule { .getDataModelDesc(model.getFusionId()).getPartitionDesc(); } - private RexNode rewriteRexCall(RexNode rexNode, RexBuilder rexBuilder, RelDataTypeFamily relDataTypeFamily, - RexInputRef partitionColInputRef, String dateFormat) { - if (!(rexNode instanceof RexCall)) { - return rexNode; - } - - RexCall rewriteRexCall = (RexCall) rexNode; - - if (COMPARISON_OP_KIND_SET.contains(rewriteRexCall.getOperator().kind)) { - return needRewrite(partitionColInputRef, rewriteRexCall) - ? rewriteRexNodeLiteral(rexNode, rexBuilder, relDataTypeFamily, dateFormat) - : rexNode; - } else { - List<RexNode> opList = rewriteRexCall.getOperands().stream() - .map(rex -> rewriteRexCall(rex, rexBuilder, relDataTypeFamily, partitionColInputRef, dateFormat)) - .collect(Collectors.toList()); - return rexBuilder.makeCall(rewriteRexCall.getOperator(), opList); - } - } - - private boolean needRewrite(RexInputRef partitionColInputRef, RexCall rewriteRexCall) { - boolean isContainsPartitionColumn = false; - boolean isContainsLiteral = false; - for (RexNode sonRexNode : rewriteRexCall.getOperands()) { - if (sonRexNode instanceof RexInputRef) { - RexInputRef rexInputRef = (RexInputRef) sonRexNode; - String columnName = rexInputRef.getName(); - if (partitionColInputRef.getName().contains(columnName)) { - isContainsPartitionColumn = true; - } - } else if (sonRexNode instanceof RexLiteral) { - isContainsLiteral = true; - } - } - return isContainsPartitionColumn && isContainsLiteral; - } - - private RexNode rewriteRexNodeLiteral(RexNode rexNodeLiteral, RexBuilder rexBuilder, - RelDataTypeFamily relDataTypeFamily, String dateFormat) { - if (rexNodeLiteral instanceof RexCall) { - try { - RexCall rexCall = (RexCall) rexNodeLiteral; - List<RexNode> oldRexNodes = rexCall.getOperands(); - List<RexNode> newRexNodes = new ArrayList<>(); - for (RexNode rexNode : oldRexNodes) { - newRexNodes.add(transform(rexNode, rexBuilder, relDataTypeFamily, dateFormat)); - } - rexNodeLiteral = rexBuilder.makeCall(rexCall.getOperator(), newRexNodes); - } catch (Exception e) { - log.warn("RewriteRexNodeLiteral failed rexNodeLiteral:{} relDataTypeFamily:{} dateFormat:{}", - rexNodeLiteral, relDataTypeFamily.toString(), dateFormat, e); - } - } - return rexNodeLiteral; - } - - private RexNode transform(RexNode rexNode, RexBuilder rexBuilder, RelDataTypeFamily relDataTypeFamily, - String dateFormat) { - if (!(rexNode instanceof RexLiteral)) { - return rexNode; - } - - RexLiteral rexLiteral = (RexLiteral) rexNode; - RexNode newLiteral; - if (SqlTypeFamily.DATE == relDataTypeFamily) { - String dateStr = normalization(dateFormat, rexLiteral); - newLiteral = rexBuilder.makeLiteral(new DateString(dateStr), - new BasicSqlType(RelDataTypeSystem.DEFAULT, SqlTypeName.DATE), true); - } else if (SqlTypeFamily.CHARACTER == relDataTypeFamily) { - String dateStr = normalization(dateFormat, rexLiteral); - newLiteral = rexBuilder.makeLiteral(new NlsString(dateStr, "UTF-16LE", SqlCollation.IMPLICIT), - new BasicSqlType(RelDataTypeSystem.DEFAULT, SqlTypeName.CHAR), true); - } else { - newLiteral = rexLiteral; - } - return newLiteral; - } - - private String normalization(String dateFormat, RexLiteral rexLiteral) { - RelDataTypeFamily typeFamily = rexLiteral.getType().getFamily(); - if (SqlTypeFamily.DATE == typeFamily || SqlTypeFamily.TIMESTAMP == typeFamily) { - // Calendar uses UTC timezone, just to keep RexLiteral's value(an instanceof DateString) - long timeInMillis = ((Calendar) rexLiteral.getValue()).getTimeInMillis(); - String dateStr = DateFormat.formatToDateStr(timeInMillis, dateFormat, UTC_ZONE); - if (!rexLiteral.toString().equals(dateStr)) { - log.warn("Normalize RexLiteral({}) to {}", rexLiteral, dateStr); - } - return dateStr; - } - return rexLiteral.getValue2().toString(); - } - - private RelDataTypeFamily getSegmentLiteralTypeFamily(RexNode rangeRexNode) { - if (rangeRexNode instanceof RexCall) { - RexCall rexCall = (RexCall) rangeRexNode; - List<RexNode> oldRexNodes = rexCall.getOperands(); - for (RexNode rexNode : oldRexNodes) { - if (rexNode instanceof RexLiteral) { - return rexNode.getType().getFamily(); - } - } - } - return null; - } - private Pair<RexNode, RexNode> transformSegment2RexCall(NDataSegment dataSegment, String dateFormat, RexBuilder rexBuilder, RexInputRef partitionColInputRef, DataType partitionColType, boolean isStreaming) { String start; diff --git a/src/query-common/src/main/java/org/apache/kylin/query/util/RexUtils.java b/src/query-common/src/main/java/org/apache/kylin/query/util/RexUtils.java index 383e0f8739..3209ea7c4b 100644 --- a/src/query-common/src/main/java/org/apache/kylin/query/util/RexUtils.java +++ b/src/query-common/src/main/java/org/apache/kylin/query/util/RexUtils.java @@ -263,6 +263,7 @@ public class RexUtils { public static RexNode transformValue2RexLiteral(RexBuilder rexBuilder, String value, DataType colType) { RelDataType relDataType; + String[] splits; switch (colType.getName()) { case DataType.DATE: // In order to support the column type is date, but the value is timestamp string. @@ -270,12 +271,17 @@ public class RexUtils { // the filter condition is: cast("cal_dt" as timestamp) >= timestamp '2012-01-01 00:00:00', // the FilterConditionExpander will translate it to compare CAL_DT >= date '2012-01-01' // This seems like an unsafe operation. - String[] splits = StringUtils.split(value.trim(), " "); + splits = StringUtils.split(value.trim(), " "); Preconditions.checkArgument(splits.length >= 1, "split %s with error", value); return rexBuilder.makeDateLiteral(new DateString(splits[0])); case DataType.TIMESTAMP: relDataType = rexBuilder.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP); - return rexBuilder.makeTimestampLiteral(new TimestampString(value), relDataType.getPrecision()); + // If the value with format yyyy-MM-dd, then pad with ` 00:00:00`, + // if with format `yyyy-MM-dd HH:mm:ss`, use this value directly, + // otherwise, wrong format, making literal will throw exception by Calcite + splits = StringUtils.split(value.trim(), " "); + String ts = splits.length == 1 ? value + " 00:00:00" : value; + return rexBuilder.makeTimestampLiteral(new TimestampString(ts), relDataType.getPrecision()); case DataType.VARCHAR: case DataType.STRING: relDataType = rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR, colType.getPrecision()); diff --git a/src/query-common/src/main/java/org/apache/kylin/util/FilterConditionExpander.java b/src/query-common/src/main/java/org/apache/kylin/util/FilterConditionExpander.java index c8087c6753..82c7993abd 100644 --- a/src/query-common/src/main/java/org/apache/kylin/util/FilterConditionExpander.java +++ b/src/query-common/src/main/java/org/apache/kylin/util/FilterConditionExpander.java @@ -22,6 +22,7 @@ import static org.apache.kylin.common.exception.QueryErrorCode.UNSUPPORTED_EXPRE import java.util.LinkedList; import java.util.List; +import java.util.Map; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; @@ -40,6 +41,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.util.NlsString; import org.apache.kylin.common.exception.KylinException; import org.apache.kylin.guava30.shaded.common.collect.Lists; +import org.apache.kylin.guava30.shaded.common.collect.Maps; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.query.relnode.ContextUtil; import org.apache.kylin.query.relnode.OlapContext; @@ -59,6 +61,8 @@ public class FilterConditionExpander { private final RelNode currentRel; private final RexBuilder rexBuilder; + private final Map<String, RexNode> cachedConvertedRelMap = Maps.newHashMap(); + public FilterConditionExpander(OlapContext context, RelNode currentRel) { this.context = context; this.currentRel = currentRel; @@ -145,8 +149,14 @@ public class FilterConditionExpander { } } - // col <op> lit - return simplify(call, lInputRef); + // col <op> lit, optimized with cache + if (cachedConvertedRelMap.containsKey(call.toString())) { + return cachedConvertedRelMap.get(call.toString()); + } else { + RexNode simplified = simplify(call, lInputRef); + cachedConvertedRelMap.put(call.toString(), simplified); // add cache + return simplified; + } } return null; diff --git a/src/query/src/main/java/org/apache/kylin/query/engine/PlannerFactory.java b/src/query/src/main/java/org/apache/kylin/query/engine/PlannerFactory.java index 6ed45272fd..22188602fe 100644 --- a/src/query/src/main/java/org/apache/kylin/query/engine/PlannerFactory.java +++ b/src/query/src/main/java/org/apache/kylin/query/engine/PlannerFactory.java @@ -53,7 +53,6 @@ import org.apache.calcite.rel.rules.ProjectMergeRule; import org.apache.calcite.rel.rules.ProjectRemoveRule; import org.apache.calcite.rel.rules.ProjectTableScanRule; import org.apache.calcite.rel.rules.ProjectWindowTransposeRule; -import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.rel.rules.SemiJoinRule; import org.apache.calcite.rel.rules.SortJoinTransposeRule; import org.apache.calcite.rel.rules.SortProjectTransposeRule; @@ -82,6 +81,7 @@ import org.apache.kylin.query.optrule.OlapModelViewRule; import org.apache.kylin.query.optrule.OlapProjectJoinTransposeRule; import org.apache.kylin.query.optrule.OlapProjectMergeRule; import org.apache.kylin.query.optrule.OlapProjectRule; +import org.apache.kylin.query.optrule.OlapReduceExpressionRule; import org.apache.kylin.query.optrule.OlapSortRule; import org.apache.kylin.query.optrule.OlapToEnumerableConverterRule; import org.apache.kylin.query.optrule.OlapUnionRule; @@ -183,15 +183,15 @@ public class PlannerFactory { // CalcitePrepareImpl.CONSTANT_REDUCTION_RULES if (kylinConfig.isReduceExpressionsRulesEnabled()) { - planner.addRule(ReduceExpressionsRule.PROJECT_INSTANCE); - planner.addRule(ReduceExpressionsRule.FILTER_INSTANCE); - planner.addRule(ReduceExpressionsRule.CALC_INSTANCE); - planner.addRule(ReduceExpressionsRule.JOIN_INSTANCE); + planner.addRule(OlapReduceExpressionRule.PROJECT_INSTANCE); + planner.addRule(OlapReduceExpressionRule.FILTER_INSTANCE); + planner.addRule(OlapReduceExpressionRule.JOIN_INSTANCE); + planner.addRule(OlapReduceExpressionRule.CALC_INSTANCE); } // the ValuesReduceRule breaks query test somehow... - // planner.addRule(ValuesReduceRule.FILTER_INSTANCE); - // planner.addRule(ValuesReduceRule.PROJECT_FILTER_INSTANCE); - // planner.addRule(ValuesReduceRule.PROJECT_INSTANCE); + // planner.addRule(ValuesReduceRule.FILTER_INSTANCE); + // planner.addRule(ValuesReduceRule.PROJECT_FILTER_INSTANCE); + // planner.addRule(ValuesReduceRule.PROJECT_INSTANCE); removeRules(planner, kylinConfig.getCalciteRemoveRule()); if (!kylinConfig.isEnumerableRulesEnabled()) { diff --git a/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java b/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java index 122a23b3db..a06f4ef07c 100644 --- a/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java +++ b/src/query/src/main/java/org/apache/kylin/query/engine/QueryExec.java @@ -69,6 +69,10 @@ import org.apache.kylin.query.engine.meta.SimpleDataContext; import org.apache.kylin.query.engine.view.ViewAnalyzer; import org.apache.kylin.query.mask.QueryResultMasks; import org.apache.kylin.query.optrule.OlapFilterJoinRule; +import org.apache.kylin.query.optrule.OlapFilterRule; +import org.apache.kylin.query.optrule.OlapProjectRule; +import org.apache.kylin.query.optrule.OlapReduceExpressionRule; +import org.apache.kylin.query.optrule.OlapValuesRule; import org.apache.kylin.query.optrule.SumConstantConvertRule; import org.apache.kylin.query.relnode.ContextUtil; import org.apache.kylin.query.relnode.OlapAggregateRel; @@ -269,6 +273,16 @@ public class QueryExec { Collection<RelOptRule> postOptRules = new LinkedHashSet<>(); // It will definitely work if it were put here postOptRules.add(SumConstantConvertRule.INSTANCE); + if (kylinConfig.isReduceExpressionsRulesEnabled()) { + // old calcite given wrong cost, therefore add FilterReduceExpressionsRule + // ignore ProjectReduceExpressionsRule for the function `concat` may give wrong result + // upgrade calcite to handle this problem? + // postOptRules.add(OlapReduceExpressionRule.PROJECT_INSTANCE) + postOptRules.add(OlapReduceExpressionRule.FILTER_INSTANCE); + postOptRules.add(OlapFilterRule.INSTANCE); + postOptRules.add(OlapProjectRule.INSTANCE); + postOptRules.add(OlapValuesRule.INSTANCE); + } if (kylinConfig.isConvertSumExpressionEnabled()) { postOptRules.addAll(HepUtils.SumExprRules); } diff --git a/src/query/src/main/java/org/apache/kylin/query/optrule/OlapProjectMergeRule.java b/src/query/src/main/java/org/apache/kylin/query/optrule/OlapProjectMergeRule.java index 478213654e..8e3ef392ab 100644 --- a/src/query/src/main/java/org/apache/kylin/query/optrule/OlapProjectMergeRule.java +++ b/src/query/src/main/java/org/apache/kylin/query/optrule/OlapProjectMergeRule.java @@ -18,6 +18,8 @@ package org.apache.kylin.query.optrule; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; @@ -27,9 +29,11 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.Permutation; @@ -80,7 +84,8 @@ public class OlapProjectMergeRule extends RelOptRule { //~ Methods ---------------------------------------------------------------- - @Override public boolean matches(RelOptRuleCall call) { + @Override + public boolean matches(RelOptRuleCall call) { final Project topProject = call.rel(0); final Project bottomProject = call.rel(1); return topProject.getConvention() == bottomProject.getConvention(); @@ -124,7 +129,7 @@ public class OlapProjectMergeRule extends RelOptRule { return; } - final List<RexNode> newProjects; + List<RexNode> newProjects; if (KylinConfig.getInstanceFromEnv().isProjectMergeWithBloatEnabled()) { newProjects = RelOptUtil.pushPastProjectUnlessBloat(topProject.getProjects(), bottomProject, KylinConfig.getInstanceFromEnv().getProjectMergeRuleBloatThreshold()); @@ -135,6 +140,7 @@ public class OlapProjectMergeRule extends RelOptRule { } else { newProjects = RelOptUtil.pushPastProject(topProject.getProjects(), bottomProject); } + newProjects = simplifyCast(newProjects, topProject.getCluster().getRexBuilder()); final RelNode input = bottomProject.getInput(); if (RexUtil.isIdentity(newProjects, input.getRowType()) && (force || input.getRowType().getFieldNames().equals(topProject.getRowType().getFieldNames()))) { @@ -148,6 +154,37 @@ public class OlapProjectMergeRule extends RelOptRule { call.transformTo(relBuilder.build()); } + public static List<RexNode> simplifyCast(List<RexNode> projects, RexBuilder rexBuilder) { + final List<RexNode> list = new ArrayList<>(); + for (RexNode rex : projects) { + if (rex.getKind() == SqlKind.CAST) { + RexNode inner = ((RexCall) rex).getOperands().get(0); + RexNode simplified = simplify(inner); + if (simplified.getType() == rex.getType()) { + list.add(simplified); + } else { + List<RexNode> newNodes = Collections.singletonList(simplified); + RexNode rexNode = rexBuilder.makeCall(rex.getType(), ((RexCall) rex).getOperator(), newNodes); + list.add(rexNode); + } + } else { + list.add(rex); + } + } + return list; + } + + private static RexNode simplify(RexNode node) { + RexNode current = node; + if (current.isA(SqlKind.CAST)) { + RexNode operand = ((RexCall) current).getOperands().get(0); + if (operand.getType().equals(current.getType())) { + current = simplify(operand); + } + } + return current; + } + private boolean containsNonMergeableExprs(Project project) { for (RexNode expr : project.getProjects()) { if (containsNonMergeableExprs(expr)) { diff --git a/src/query/src/main/java/org/apache/kylin/query/optrule/OlapReduceExpressionRule.java b/src/query/src/main/java/org/apache/kylin/query/optrule/OlapReduceExpressionRule.java new file mode 100644 index 0000000000..cedf0130bd --- /dev/null +++ b/src/query/src/main/java/org/apache/kylin/query/optrule/OlapReduceExpressionRule.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.query.optrule; + +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.logical.LogicalCalc; +import org.apache.calcite.rel.rules.ReduceExpressionsRule; +import org.apache.kylin.query.relnode.OlapFilterRel; +import org.apache.kylin.query.relnode.OlapJoinRel; +import org.apache.kylin.query.relnode.OlapProjectRel; + +/** + * Override {@link ReduceExpressionsRule} so that it can reduce expressions of OlapRel + */ +public class OlapReduceExpressionRule { + private OlapReduceExpressionRule() { + } + + public static final ReduceExpressionsRule FILTER_INSTANCE = new ReduceExpressionsRule.FilterReduceExpressionsRule( + OlapFilterRel.class, true, RelFactories.LOGICAL_BUILDER); + + public static final ReduceExpressionsRule PROJECT_INSTANCE = new ReduceExpressionsRule.ProjectReduceExpressionsRule( + OlapProjectRel.class, true, RelFactories.LOGICAL_BUILDER); + + public static final ReduceExpressionsRule JOIN_INSTANCE = new ReduceExpressionsRule.JoinReduceExpressionsRule( + OlapJoinRel.class, true, RelFactories.LOGICAL_BUILDER); + + public static final ReduceExpressionsRule CALC_INSTANCE = new ReduceExpressionsRule.CalcReduceExpressionsRule( + LogicalCalc.class, true, RelFactories.LOGICAL_BUILDER); + +}