[kylin] branch master updated: KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting havingFilter correctly (#1529)

xxyu Sun, 03 Jan 2021 19:15:10 -0800

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git



The following commit(s) were added to refs/heads/master by this push:
     new 92e8d75  KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not 
setting havingFilter correctly (#1529)
92e8d75 is described below

commit 92e8d755c6efcb9a60ac1ae8d6f7863a5ae9d2b7
Author: kyotoYaho <nju_y...@apache.org>
AuthorDate: Mon Jan 4 11:14:56 2021 +0800

    KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting 
havingFilter correctly (#1529)
    
    * KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting 
havingFilter correctly
    
    * KYLIN-4682 Add test cases
---
 .../FactDistinctColumnsReducerMappingTest.java     |  3 +-
 .../localmeta/cube_desc/ci_left_join_cube.json     |  3 +-
 .../resources/query/sql_expression/query10.sql     | 26 ++++++++++
 .../kylin/query/relnode/OLAPAggregateRel.java      | 33 ++++++++----
 .../apache/kylin/query/relnode/OLAPFilterRel.java  | 60 ++++++++++++++++++++--
 .../jdbc/extensible/JdbcHiveMRInputTest.java       |  2 +-
 6 files changed, 109 insertions(+), 18 deletions(-)

diff --git 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java
 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java
index 9a00d55..8a32cf5 100644
--- 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java
+++ 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducerMappingTest.java
@@ -52,6 +52,7 @@ public class FactDistinctColumnsReducerMappingTest extends 
LocalFileMetadataTest
         CubeManager mgr = CubeManager.getInstance(getTestConfig());
         CubeInstance cube = mgr.getCube("ci_left_join_cube");
         TblColRef aUHC = 
cube.getModel().findColumn("TEST_COUNT_DISTINCT_BITMAP");
+        TblColRef shardByCol = cube.getModel().findColumn("SELLER_ID");
 
         FactDistinctColumnsReducerMapping mapping = new 
FactDistinctColumnsReducerMapping(cube);
         
@@ -73,7 +74,7 @@ public class FactDistinctColumnsReducerMappingTest extends 
LocalFileMetadataTest
         Assert.assertEquals(2, mapping.getReducerNumForDimCol(aUHC));
         int uhcReducerBegin = -1;
         for (int i = 0; i < dictEnd; i++) {
-            if (mapping.getColForReducer(i).equals(aUHC)) {
+            if (mapping.getColForReducer(i).equals(aUHC) || 
mapping.getColForReducer(i).equals(shardByCol)) {
                 uhcReducerBegin = i;
                 break;
             }
diff --git a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json 
b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
index cffce8a..60028dc 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json
@@ -363,7 +363,8 @@
     "rowkey_columns": [
       {
         "column": "TEST_KYLIN_FACT.SELLER_ID",
-        "encoding": "int:4"
+        "encoding": "int:4",
+        "isShardBy": true
       },
       {
         "column": "TEST_KYLIN_FACT.ORDER_ID",
diff --git a/kylin-it/src/test/resources/query/sql_expression/query10.sql 
b/kylin-it/src/test/resources/query/sql_expression/query10.sql
new file mode 100644
index 0000000..7ec0087
--- /dev/null
+++ b/kylin-it/src/test/resources/query/sql_expression/query10.sql
@@ -0,0 +1,26 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+SELECT SELLER_ID,
+    CASE WHEN LSTG_SITE_ID > 1 THEN LSTG_SITE_ID
+         ELSE LEAF_CATEG_ID
+    END AS dyna_GROUP,
+    SUM(PRICE)
+FROM TEST_KYLIN_FACT
+GROUP BY 1, 2
+HAVING SELLER_id is not null and SUM(PRICE)>10
\ No newline at end of file
diff --git 
a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java 
b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
index 8ec648c..19245b8 100755
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
@@ -26,6 +26,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.calcite.adapter.enumerable.EnumerableAggregate;
 import org.apache.calcite.adapter.enumerable.EnumerableConvention;
@@ -131,7 +132,7 @@ public class OLAPAggregateRel extends Aggregate implements 
OLAPRel {
     private boolean afterAggregate;
     private Map<Integer, AggregateCall> hackAggCalls;
     private List<AggregateCall> rewriteAggCalls;
-    private List<TblColRef> groups;
+    private List<Set<TblColRef>> groups;
     private List<FunctionDesc> aggregations;
     private boolean rewriting;
 
@@ -199,7 +200,11 @@ public class OLAPAggregateRel extends Aggregate implements 
OLAPRel {
 
         // only translate the innermost aggregation
         if (!this.afterAggregate) {
-            addToContextGroupBy(this.groups);
+            List<TblColRef> colRefList = Lists.newArrayList();
+            for (Set<TblColRef> colRefSet : this.groups) {
+                colRefList.addAll(colRefSet);
+            }
+            addToContextGroupBy(colRefList);
             this.context.aggregations.addAll(this.aggregations);
             this.context.aggrOutCols
                     
.addAll(columnRowType.getAllColumns().subList(groups.size(), 
columnRowType.getAllColumns().size()));
@@ -224,14 +229,18 @@ public class OLAPAggregateRel extends Aggregate 
implements OLAPRel {
         buildAggregations();
 
         ColumnRowType inputColumnRowType = ((OLAPRel) 
getInput()).getColumnRowType();
-        List<TblColRef> columns = new 
ArrayList<TblColRef>(this.rowType.getFieldCount());
-        columns.addAll(this.groups);
+        List<TblColRef> columns = new 
ArrayList<>(this.rowType.getFieldCount());
+        for (Set<TblColRef> groupCols : groups) {
+            String name = 
groupCols.stream().map(TblColRef::getName).collect(Collectors.toList()).toString();
+            TblColRef groupInnerCol = TblColRef.newInnerColumn(name, 
TblColRef.InnerDataTypeEnum.LITERAL);
+            columns.add(groupInnerCol);
+        }
 
         // Add group column indicators
         if (indicator) {
             final Set<String> containedNames = Sets.newHashSet();
-            for (TblColRef groupCol : groups) {
-                String base = "i$" + groupCol.getName();
+            for (Set<TblColRef> groupCols : groups) {
+                String base = "i$" + 
groupCols.stream().map(TblColRef::getName).collect(Collectors.toList());
                 String name = base;
                 int i = 0;
                 while (containedNames.contains(name)) {
@@ -286,7 +295,7 @@ public class OLAPAggregateRel extends Aggregate implements 
OLAPRel {
 
             TblColRef groupOutCol = inputColumnRowType.getColumnByIndex(i);
             if (tupleExpression instanceof ColumnTupleExpression) {
-                this.groups.add(((ColumnTupleExpression) 
tupleExpression).getColumn());
+                this.groups.add(Sets.newHashSet(((ColumnTupleExpression) 
tupleExpression).getColumn()));
             } else if (this.context.isDynamicColumnEnabled() && 
tupleExpression.ifForDynamicColumn()) {
                 Pair<Set<TblColRef>, Set<TblColRef>> cols = 
ExpressionColCollector.collectColumnsPair(tupleExpression);
 
@@ -309,11 +318,13 @@ public class OLAPAggregateRel extends Aggregate 
implements OLAPRel {
                 }
 
                 if (ifPushDown) {
-                    this.groups.add(groupOutCol);
+                    this.groups.add(Sets.newHashSet(groupOutCol));
                     this.context.dynGroupBy.put(groupOutCol, tupleExpression);
                 } else {
-                    this.groups.addAll(cols.getFirst());
-                    this.groups.addAll(cols.getSecond());
+                    Set<TblColRef> srcCols = Sets.newHashSet();
+                    srcCols.addAll(cols.getFirst());
+                    srcCols.addAll(cols.getSecond());
+                    this.groups.add(srcCols);
                     this.context.dynamicFields.remove(groupOutCol);
                 }
             } else {
@@ -322,7 +333,7 @@ public class OLAPAggregateRel extends Aggregate implements 
OLAPRel {
                 if (srcCols.isEmpty()) {
                     srcCols.add(groupOutCol);
                 }
-                this.groups.addAll(srcCols);
+                this.groups.add(srcCols);
             }
         }
     }
diff --git 
a/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java 
b/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java
index bfd6c4d..8f437c6 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPFilterRel.java
@@ -21,16 +21,19 @@ package org.apache.kylin.query.relnode;
 import java.util.List;
 import java.util.Set;
 
+import com.google.common.collect.Lists;
 import org.apache.calcite.adapter.enumerable.EnumerableCalc;
 import org.apache.calcite.adapter.enumerable.EnumerableConvention;
 import org.apache.calcite.adapter.enumerable.EnumerableRel;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptCost;
 import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.RelTrait;
 import org.apache.calcite.plan.RelTraitSet;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.RelWriter;
+import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
 import org.apache.calcite.rel.type.RelDataType;
@@ -38,6 +41,8 @@ import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexProgram;
 import org.apache.calcite.rex.RexProgramBuilder;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.metadata.filter.FilterOptimizeTransformer;
 import org.apache.kylin.metadata.filter.LogicalTupleFilter;
@@ -88,11 +93,58 @@ public class OLAPFilterRel extends Filter implements 
OLAPRel {
         } else {
             context.afterHavingClauseFilter = true;
 
-            TupleFilterVisitor visitor = new 
TupleFilterVisitor(this.columnRowType);
-            TupleFilter havingFilter = this.condition.accept(visitor);
-            if (context.havingFilter == null)
-                context.havingFilter = havingFilter;
+            if (context.havingFilter == null) {
+                TupleFilterVisitor visitor = new 
TupleFilterVisitor(this.columnRowType);
+                RexNode condition = getHavingFilterCondition();
+                if (condition != null) {
+                    context.havingFilter = condition.accept(visitor);
+                }
+            }
+        }
+    }
+
+    // In case that there's (is not null) for some dimension in the having 
filter,
+    //      which may not utilize the FilterAggregateTransposeRule to do 
filter decomposition,
+    // we need to extract filters on aggregations here which may be used in 
GTCubeStorageQueryBase.
+    // Otherwise, the logic in GTCubeStorageQueryBase may not be correct and 
may throw exceptions
+    private RexNode getHavingFilterCondition() {
+        if (!(getInput() instanceof OLAPAggregateRel)) {
+            return this.condition;
+        }
+
+        List<RexNode> remainingConditions = Lists.newArrayList();
+
+        OLAPAggregateRel aggRel = (OLAPAggregateRel) getInput();
+        final List<RexNode> conditions = 
RelOptUtil.conjunctions(this.condition);
+        for (RexNode condition : conditions) {
+            ImmutableBitSet rCols = RelOptUtil.InputFinder.bits(condition);
+            if (!canPush(aggRel, rCols)) {
+                remainingConditions.add(condition);
+            }
+        }
+
+        return remainingConditions.isEmpty() ? null
+                : RexUtil.composeDisjunction(getCluster().getRexBuilder(), 
remainingConditions);
+    }
+
+    private boolean canPush(OLAPAggregateRel aggregate, ImmutableBitSet rCols) 
{
+        // If the filter references columns not in the group key, we cannot 
push
+        final ImmutableBitSet groupKeys = ImmutableBitSet.range(0, 
aggregate.getGroupSet().cardinality());
+        if (!groupKeys.contains(rCols)) {
+            return false;
+        }
+
+        if (aggregate.getGroupType() != Aggregate.Group.SIMPLE) {
+            // If grouping sets are used, the filter can be pushed if
+            // the columns referenced in the predicate are present in
+            // all the grouping sets.
+            for (ImmutableBitSet groupingSet : aggregate.getGroupSets()) {
+                if (!groupingSet.contains(rCols)) {
+                    return false;
+                }
+            }
         }
+        return true;
     }
 
     ColumnRowType buildColumnRowType() {
diff --git 
a/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java
 
b/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java
index 0f6f3ff..a10fb55 100644
--- 
a/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java
+++ 
b/source-jdbc/src/test/java/org/apache/kylin/source/jdbc/extensible/JdbcHiveMRInputTest.java
@@ -95,7 +95,7 @@ public class JdbcHiveMRInputTest extends TestBase {
         String cmd = executable.getParam("cmd");
         Assert.assertTrue(cmd.contains("org.h2.Driver"));
         Assert.assertTrue(
-                cmd.contains("--boundary-query \"SELECT 
MIN(\\\"TEST_KYLIN_FACT\\\".\\\"CAL_DT\\\"), 
MAX(\\\"TEST_KYLIN_FACT\\\".\\\"CAL_DT\\\")" + System.lineSeparator()
+                cmd.contains("--boundary-query \"SELECT 
MIN(\\\"TEST_KYLIN_FACT\\\".\\\"SELLER_ID\\\"), 
MAX(\\\"TEST_KYLIN_FACT\\\".\\\"SELLER_ID\\\")" + System.lineSeparator()
                         + "FROM \\\"DEFAULT\\\".\\\"TEST_KYLIN_FACT\\\" AS 
\\\"TEST_KYLIN_FACT\\\"\""));
         source.close();
     }

[kylin] branch master updated: KYLIN-4682 Fix java.lang.IndexOutOfBoundsException due to not setting havingFilter correctly (#1529)

Reply via email to