This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new 92e8d75 KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting havingFilter correctly (#1529) 92e8d75 is described below commit 92e8d755c6efcb9a60ac1ae8d6f7863a5ae9d2b7 Author: kyotoYaho <nju_y...@apache.org> AuthorDate: Mon Jan 4 11:14:56 2021 +0800 KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting havingFilter correctly (#1529) * KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting havingFilter correctly * KYLIN-4682 Add test cases --- .../FactDistinctColumnsReducerMappingTest.java | 3 +- .../localmeta/cube_desc/ci_left_join_cube.json | 3 +- .../resources/query/sql_expression/query10.sql | 26 ++++++++++ .../kylin/query/relnode/OLAPAggregateRel.java | 33 ++++++++---- .../apache/kylin/query/relnode/OLAPFilterRel.java | 60 ++++++++++++++++++++-- .../jdbc/extensible/JdbcHiveMRInputTest.java | 2 +- 6 files changed, 109 insertions(+), 18 deletions(-) diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java index 9a00d55..8a32cf5 100644 --- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java +++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java @@ -52,6 +52,7 @@ public class FactDistinctColumnsReducerMappingTest extends LocalFileMetadataTest CubeManager mgr = CubeManager.getInstance(getTestConfig()); CubeInstance cube = mgr.getCube("ci_left_join_cube"); TblColRef aUHC = cube.getModel().findColumn("TEST_COUNT_DISTINCT_BITMAP"); + TblColRef shardByCol = cube.getModel().findColumn("SELLER_ID"); FactDistinctColumnsReducerMapping mapping = new FactDistinctColumnsReducerMapping(cube); @@ -73,7 +74,7 @@ public class FactDistinctColumnsReducerMappingTest extends LocalFileMetadataTest Assert.assertEquals(2, mapping.getReducerNumForDimCol(aUHC)); int uhcReducerBegin = -1; for (int i = 0; i < dictEnd; i++) { - if (mapping.getColForReducer(i).equals(aUHC)) { + if (mapping.getColForReducer(i).equals(aUHC) || mapping.getColForReducer(i).equals(shardByCol)) { uhcReducerBegin = i; break; } diff --git a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json index cffce8a..60028dc 100644 --- a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json +++ b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json @@ -363,7 +363,8 @@ "rowkey_columns": [ { "column": "TEST_KYLIN_FACT.SELLER_ID", - "encoding": "int:4" + "encoding": "int:4", + "isShardBy": true }, { "column": "TEST_KYLIN_FACT.ORDER_ID", diff --git a/kylin-it/src/test/resources/query/sql_expression/query10.sql b/kylin-it/src/test/resources/query/sql_expression/query10.sql new file mode 100644 index 0000000..7ec0087 --- /dev/null +++ b/kylin-it/src/test/resources/query/sql_expression/query10.sql @@ -0,0 +1,26 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +SELECT SELLER_ID, + CASE WHEN LSTG_SITE_ID > 1 THEN LSTG_SITE_ID + ELSE LEAF_CATEG_ID + END AS dyna_GROUP, + SUM(PRICE) +FROM TEST_KYLIN_FACT +GROUP BY 1, 2 +HAVING SELLER_id is not null and SUM(PRICE)>10 \ No newline at end of file diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java index 8ec648c..19245b8 100755 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java @@ -26,6 +26,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import org.apache.calcite.adapter.enumerable.EnumerableAggregate; import org.apache.calcite.adapter.enumerable.EnumerableConvention; @@ -131,7 +132,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { private boolean afterAggregate; private Map<Integer, AggregateCall> hackAggCalls; private List<AggregateCall> rewriteAggCalls; - private List<TblColRef> groups; + private List<Set<TblColRef>> groups; private List<FunctionDesc> aggregations; private boolean rewriting; @@ -199,7 +200,11 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { // only translate the innermost aggregation if (!this.afterAggregate) { - addToContextGroupBy(this.groups); + List<TblColRef> colRefList = Lists.newArrayList(); + for (Set<TblColRef> colRefSet : this.groups) { + colRefList.addAll(colRefSet); + } + addToContextGroupBy(colRefList); this.context.aggregations.addAll(this.aggregations); this.context.aggrOutCols .addAll(columnRowType.getAllColumns().subList(groups.size(), columnRowType.getAllColumns().size())); @@ -224,14 +229,18 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { buildAggregations(); ColumnRowType inputColumnRowType = ((OLAPRel) getInput()).getColumnRowType(); - List<TblColRef> columns = new ArrayList<TblColRef>(this.rowType.getFieldCount()); - columns.addAll(this.groups); + List<TblColRef> columns = new ArrayList<>(this.rowType.getFieldCount()); + for (Set<TblColRef> groupCols : groups) { + String name = groupCols.stream().map(TblColRef::getName).collect(Collectors.toList()).toString(); + TblColRef groupInnerCol = TblColRef.newInnerColumn(name, TblColRef.InnerDataTypeEnum.LITERAL); + columns.add(groupInnerCol); + } // Add group column indicators if (indicator) { final Set<String> containedNames = Sets.newHashSet(); - for (TblColRef groupCol : groups) { - String base = "i$" + groupCol.getName(); + for (Set<TblColRef> groupCols : groups) { + String base = "i$" + groupCols.stream().map(TblColRef::getName).collect(Collectors.toList()); String name = base; int i = 0; while (containedNames.contains(name)) { @@ -286,7 +295,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { TblColRef groupOutCol = inputColumnRowType.getColumnByIndex(i); if (tupleExpression instanceof ColumnTupleExpression) { - this.groups.add(((ColumnTupleExpression) tupleExpression).getColumn()); + this.groups.add(Sets.newHashSet(((ColumnTupleExpression) tupleExpression).getColumn())); } else if (this.context.isDynamicColumnEnabled() && tupleExpression.ifForDynamicColumn()) { Pair<Set<TblColRef>, Set<TblColRef>> cols = ExpressionColCollector.collectColumnsPair(tupleExpression); @@ -309,11 +318,13 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { } if (ifPushDown) { - this.groups.add(groupOutCol); + this.groups.add(Sets.newHashSet(groupOutCol)); this.context.dynGroupBy.put(groupOutCol, tupleExpression); } else { - this.groups.addAll(cols.getFirst()); - this.groups.addAll(cols.getSecond()); + Set<TblColRef> srcCols = Sets.newHashSet(); + srcCols.addAll(cols.getFirst()); + srcCols.addAll(cols.getSecond()); + this.groups.add(srcCols); this.context.dynamicFields.remove(groupOutCol); } } else { @@ -322,7 +333,7 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { if (srcCols.isEmpty()) { srcCols.add(groupOutCol); } - this.groups.addAll(srcCols); + this.groups.add(srcCols); } } } diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java index bfd6c4d..8f437c6 100644 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java @@ -21,16 +21,19 @@ package org.apache.kylin.query.relnode; import java.util.List; import java.util.Set; +import com.google.common.collect.Lists; import org.apache.calcite.adapter.enumerable.EnumerableCalc; import org.apache.calcite.adapter.enumerable.EnumerableConvention; import org.apache.calcite.adapter.enumerable.EnumerableRel; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelTrait; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; @@ -38,6 +41,8 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexProgram; import org.apache.calcite.rex.RexProgramBuilder; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.metadata.filter.FilterOptimizeTransformer; import org.apache.kylin.metadata.filter.LogicalTupleFilter; @@ -88,11 +93,58 @@ public class OLAPFilterRel extends Filter implements OLAPRel { } else { context.afterHavingClauseFilter = true; - TupleFilterVisitor visitor = new TupleFilterVisitor(this.columnRowType); - TupleFilter havingFilter = this.condition.accept(visitor); - if (context.havingFilter == null) - context.havingFilter = havingFilter; + if (context.havingFilter == null) { + TupleFilterVisitor visitor = new TupleFilterVisitor(this.columnRowType); + RexNode condition = getHavingFilterCondition(); + if (condition != null) { + context.havingFilter = condition.accept(visitor); + } + } + } + } + + // In case that there's (is not null) for some dimension in the having filter, + // which may not utilize the FilterAggregateTransposeRule to do filter decomposition, + // we need to extract filters on aggregations here which may be used in GTCubeStorageQueryBase. + // Otherwise, the logic in GTCubeStorageQueryBase may not be correct and may throw exceptions + private RexNode getHavingFilterCondition() { + if (!(getInput() instanceof OLAPAggregateRel)) { + return this.condition; + } + + List<RexNode> remainingConditions = Lists.newArrayList(); + + OLAPAggregateRel aggRel = (OLAPAggregateRel) getInput(); + final List<RexNode> conditions = RelOptUtil.conjunctions(this.condition); + for (RexNode condition : conditions) { + ImmutableBitSet rCols = RelOptUtil.InputFinder.bits(condition); + if (!canPush(aggRel, rCols)) { + remainingConditions.add(condition); + } + } + + return remainingConditions.isEmpty() ? null + : RexUtil.composeDisjunction(getCluster().getRexBuilder(), remainingConditions); + } + + private boolean canPush(OLAPAggregateRel aggregate, ImmutableBitSet rCols) { + // If the filter references columns not in the group key, we cannot push + final ImmutableBitSet groupKeys = ImmutableBitSet.range(0, aggregate.getGroupSet().cardinality()); + if (!groupKeys.contains(rCols)) { + return false; + } + + if (aggregate.getGroupType() != Aggregate.Group.SIMPLE) { + // If grouping sets are used, the filter can be pushed if + // the columns referenced in the predicate are present in + // all the grouping sets. + for (ImmutableBitSet groupingSet : aggregate.getGroupSets()) { + if (!groupingSet.contains(rCols)) { + return false; + } + } } + return true; } ColumnRowType buildColumnRowType() { diff --git a/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java b/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java index 0f6f3ff..a10fb55 100644 --- a/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java +++ b/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java @@ -95,7 +95,7 @@ public class JdbcHiveMRInputTest extends TestBase { String cmd = executable.getParam("cmd"); Assert.assertTrue(cmd.contains("org.h2.Driver")); Assert.assertTrue( - cmd.contains("--boundary-query \"SELECT MIN(\\\"TEST_KYLIN_FACT\\\".\\\"CAL_DT\\\"), MAX(\\\"TEST_KYLIN_FACT\\\".\\\"CAL_DT\\\")" + System.lineSeparator() + cmd.contains("--boundary-query \"SELECT MIN(\\\"TEST_KYLIN_FACT\\\".\\\"SELLER_ID\\\"), MAX(\\\"TEST_KYLIN_FACT\\\".\\\"SELLER_ID\\\")" + System.lineSeparator() + "FROM \\\"DEFAULT\\\".\\\"TEST_KYLIN_FACT\\\" AS \\\"TEST_KYLIN_FACT\\\"\"")); source.close(); }