This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new f968e31 KYLIN-2620 Make the condition stricter to answer query with topN f968e31 is described below commit f968e31140e6b5e68ef3c8124192987752c32a03 Author: chao long <wayn...@qq.com> AuthorDate: Fri Mar 1 19:13:30 2019 +0800 KYLIN-2620 Make the condition stricter to answer query with topN --- .../java/org/apache/kylin/cube/CubeInstance.java | 3 +- .../apache/kylin/measure/topn/TopNMeasureType.java | 48 +++++++++++++++++++++- .../kylin/metadata/realization/SQLDigest.java | 4 +- .../apache/kylin/storage/hbase/ITStorageTest.java | 2 +- .../apache/kylin/query/relnode/OLAPContext.java | 3 +- .../apache/kylin/query/relnode/OLAPLimitRel.java | 1 + .../apache/kylin/query/relnode/OLAPSortRel.java | 19 +++++---- 7 files changed, 66 insertions(+), 14 deletions(-) diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java index 9c0f3e4..9c24a59 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java @@ -534,7 +534,8 @@ public class CubeInstance extends RootPersistentEntity implements IRealization, if (result.capable) { result.cost = getCost(digest); for (CapabilityInfluence i : result.influences) { - result.cost *= (i.suggestCostMultiplier() == 0) ? 1.0 : i.suggestCostMultiplier(); + double suggestCost = i.suggestCostMultiplier(); + result.cost *= (suggestCost == 0) ? 1.0 : suggestCost; } } else { result.cost = -1; diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java index d53a70a..194ba36 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.kylin.common.util.ByteArray; @@ -51,6 +52,8 @@ import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; +import static org.apache.kylin.metadata.realization.SQLDigest.OrderEnum.DESCENDING; + public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { private static final Logger logger = LoggerFactory.getLogger(TopNMeasureType.class); @@ -63,6 +66,8 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { public static final String CONFIG_AGG = "topn.aggregation"; public static final String CONFIG_ORDER = "topn.order"; + private boolean cuboidCanAnswer; + public static class Factory extends MeasureTypeFactory<TopNCounter<ByteArray>> { @Override @@ -257,6 +262,8 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { // TopN measure can (and only can) provide one numeric measure and one literal dimension // e.g. select seller, sum(gmv) from ... group by seller order by 2 desc limit 100 + cuboidCanAnswer = true; // true: have cuboid can answer query, false: no cuboid can answer query + List<TblColRef> literalCol = getTopNLiteralColumn(topN.getFunction()); for (TblColRef colRef : literalCol) { if (digest.filterColumns.contains(colRef) == true) { @@ -268,6 +275,12 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { if (digest.groupbyColumns.containsAll(literalCol) == false) return null; + List retainList = unmatchedDimensions.stream().filter(colRef -> literalCol.contains(colRef)).collect(Collectors.toList()); + + if (retainList.size() > 0){ + cuboidCanAnswer = false; + } + // check digest requires only one measure if (digest.aggregations.size() == 1) { @@ -278,10 +291,17 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { unmatchedDimensions.removeAll(literalCol); unmatchedAggregations.remove(onlyFunction); + return new CapabilityInfluence() { @Override public double suggestCostMultiplier() { - return 0.3; // make sure TopN get ahead of other matched realizations + if (totallyMatchTopN(digest)) { + return 0.3; // make sure TopN get ahead of other matched realizations + } else if (cuboidCanAnswer) { + return 1.3; // fuzzy topN match, but have cuboid can answer query + } else { + return 2; + } } @Override @@ -312,6 +332,25 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { return null; } + private boolean hasOneElement(List<? extends Object> list) { + return list != null && list.size() == 1; + } + + private boolean totallyMatchTopN(SQLDigest digest) { + boolean sortColumnMatch = false; + if (hasOneElement(digest.sortColumns)) { + TblColRef sortColumn = digest.sortColumns.get(0); + if (!digest.groupbyColumns.contains(sortColumn)) { + // only have one aggregation + sortColumnMatch = sortColumn.getColumnDesc().getZeroBasedIndex() == 0; + } + } + + return sortColumnMatch + && hasOneElement(digest.sortOrders) && DESCENDING.equals(digest.sortOrders.get(0)) + && digest.hasLimit; + } + private boolean isTopNCompatibleSum(FunctionDesc topN, FunctionDesc sum) { if (sum == null) return false; @@ -372,9 +411,16 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { continue; } + // topN not totally match, but have cuboid can answer, not use topN to adjust + // topN totally match or (topN fuzzy match, but no cuboid can answer), use topN to adjust + if (!totallyMatchTopN(sqlDigest) && cuboidCanAnswer) { + continue; + } + logger.info("Rewrite function " + origFunc + " to " + topnFunc); } + sqlDigest.aggregations = Lists.newArrayList(topnFunc); sqlDigest.groupbyColumns.removeAll(topnLiteralCol); sqlDigest.metricColumns.addAll(topnLiteralCol); diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java index fa7d1e5..78f0adc 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java @@ -81,6 +81,7 @@ public class SQLDigest { public List<OrderEnum> sortOrders; public boolean isRawQuery; public boolean limitPrecedesAggr; + public boolean hasLimit; public Set<MeasureDesc> involvedMeasure; @@ -91,7 +92,7 @@ public class SQLDigest { List<DynamicFunctionDesc> dynAggregations, // Set<TblColRef> rtDimensionColumns, Set<TblColRef> rtMetricColumns, // dynamic col related columns Set<TblColRef> filterColumns, TupleFilter filter, TupleFilter havingFilter, // filter - List<TblColRef> sortColumns, List<OrderEnum> sortOrders, boolean limitPrecedesAggr, // sort & limit + List<TblColRef> sortColumns, List<OrderEnum> sortOrders, boolean limitPrecedesAggr, boolean hasLimit, // sort & limit Set<MeasureDesc> involvedMeasure ) { this.factTable = factTable; @@ -121,6 +122,7 @@ public class SQLDigest { this.sortOrders = sortOrders; this.isRawQuery = isRawQuery(); this.limitPrecedesAggr = limitPrecedesAggr; + this.hasLimit = hasLimit; this.involvedMeasure = involvedMeasure; diff --git a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java index 3f8dccc..9022016 100644 --- a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java +++ b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java @@ -148,7 +148,7 @@ public class ITStorageTest extends HBaseMetadataTestCase { /*runtimeDimensionColumns*/ Collections.<TblColRef> emptySet(), // /*runtimeMetricColumns*/ Collections.<TblColRef> emptySet(), // /*filter col*/ Collections.<TblColRef> emptySet(), filter, null, // - /*sortCol*/ new ArrayList<TblColRef>(), new ArrayList<SQLDigest.OrderEnum>(), false, new HashSet<MeasureDesc>()); + /*sortCol*/ new ArrayList<TblColRef>(), new ArrayList<SQLDigest.OrderEnum>(), false, false, new HashSet<MeasureDesc>()); iterator = storageEngine.search(context, sqlDigest, mockup.newTupleInfo(groups, aggregations)); while (iterator.hasNext()) { ITuple tuple = iterator.next(); diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java index 340967c..b8ddd02 100755 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java @@ -132,6 +132,7 @@ public class OLAPContext { public boolean limitPrecedesAggr = false; public boolean afterJoin = false; public boolean hasJoin = false; + public boolean hasLimit = false; public boolean hasWindow = false; public boolean groupByExpression = false; // checkout if group by column has operator public boolean afterOuterAggregate = false; @@ -197,7 +198,7 @@ public class OLAPContext { metricsColumns, aggregations, aggrSqlCalls, dynFuncs, // aggregation rtDimColumns, rtMetricColumns, // runtime related columns filterColumns, filter, havingFilter, // filter - sortColumns, sortOrders, limitPrecedesAggr, // sort & limit + sortColumns, sortOrders, limitPrecedesAggr, hasLimit, // sort & limit involvedMeasure); } return sqlDigest; diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java index 1d0654c..8e04859 100755 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPLimitRel.java @@ -78,6 +78,7 @@ public class OLAPLimitRel extends SingleRel implements OLAPRel { this.columnRowType = buildColumnRowType(); this.context = implementor.getContext(); + this.context.hasLimit = true; // ignore limit after having clause // ignore limit after another limit, e.g. select A, count(*) from (select A,B from fact group by A,B limit 100) limit 10 diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java index 6432875..de05005 100644 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPSortRel.java @@ -73,6 +73,16 @@ public class OLAPSortRel extends Sort implements OLAPRel { this.context = implementor.getContext(); this.columnRowType = buildColumnRowType(); + + for (RelFieldCollation fieldCollation : this.collation.getFieldCollations()) { + int index = fieldCollation.getFieldIndex(); + SQLDigest.OrderEnum order = getOrderEnum(fieldCollation.getDirection()); + OLAPRel olapChild = (OLAPRel) this.getInput(); + TblColRef orderCol = olapChild.getColumnRowType().getAllColumns().get(index); + this.context.addSort(orderCol, order); + this.context.storageContext.markSort(); + + } } ColumnRowType buildColumnRowType() { @@ -90,15 +100,6 @@ public class OLAPSortRel extends Sort implements OLAPRel { if (this.context.realization == null) return; - for (RelFieldCollation fieldCollation : this.collation.getFieldCollations()) { - int index = fieldCollation.getFieldIndex(); - SQLDigest.OrderEnum order = getOrderEnum(fieldCollation.getDirection()); - OLAPRel olapChild = (OLAPRel) this.getInput(); - TblColRef orderCol = olapChild.getColumnRowType().getAllColumns().get(index); - this.context.addSort(orderCol, order); - this.context.storageContext.markSort(); - } - this.rowType = this.deriveRowType(); this.columnRowType = buildColumnRowType(); }