KYLIN-1979 Move hackNoGroupByAggregation to cube-based storage implementations
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ae9d7479 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ae9d7479 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ae9d7479 Branch: refs/heads/1.5.x-CDH5.7 Commit: ae9d747904d02f89a4d6616ca201243afbc075cc Parents: 46be805 Author: Hongbin Ma <mahong...@apache.org> Authored: Sun Aug 28 23:19:06 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Sun Aug 28 23:19:06 2016 +0800 ---------------------------------------------------------------------- .../kylin/cube/CubeCapabilityChecker.java | 2 +- .../apache/kylin/cube/RawQueryLastHacker.java | 82 ++++++++++++++++++++ .../org/apache/kylin/measure/MeasureType.java | 2 +- .../dim/DimCountDistinctMeasureType.java | 10 ++- .../ExtendedColumnMeasureType.java | 26 ++++--- .../kylin/measure/raw/RawMeasureType.java | 47 +++++------ .../kylin/measure/topn/TopNMeasureType.java | 38 ++++----- .../kylin/metadata/realization/SQLDigest.java | 5 +- .../gtrecord/GTCubeStorageQueryBase.java | 23 +++++- .../kylin/query/enumerator/OLAPEnumerator.java | 63 --------------- .../storage/hbase/cube/v1/CubeStorageQuery.java | 23 +++++- 11 files changed, 195 insertions(+), 126 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java index e0d8dd3..79d1e3b 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeCapabilityChecker.java @@ -88,7 +88,7 @@ public class CubeCapabilityChecker { return result; } - if (digest.isRawQuery() && cube.getFactTable().equals(digest.factTable)) { + if (digest.isRawQuery && cube.getFactTable().equals(digest.factTable)) { result.influences.add(new CapabilityInfluence() { @Override public double suggestCostMultiplier() { http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-cube/src/main/java/org/apache/kylin/cube/RawQueryLastHacker.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/RawQueryLastHacker.java b/core-cube/src/main/java/org/apache/kylin/cube/RawQueryLastHacker.java new file mode 100644 index 0000000..63ddac5 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/RawQueryLastHacker.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.cube; + +import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.ParameterDesc; +import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.metadata.realization.SQLDigest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RawQueryLastHacker { + + private static final Logger logger = LoggerFactory.getLogger(RawQueryLastHacker.class); + + public static void hackNoAggregations(SQLDigest sqlDigest, CubeDesc cubeDesc) { + if (!sqlDigest.isRawQuery) { + return; + } + + // If no group by and metric found, then it's simple query like select ... from ... where ..., + // But we have no raw data stored, in order to return better results, we hack to output sum of metric column + logger.info("No group by and aggregation found in this query, will hack some result for better look of output..."); + + // If it's select * from ..., + // We need to retrieve cube to manually add columns into sqlDigest, so that we have full-columns results as output. + boolean isSelectAll = sqlDigest.allColumns.isEmpty() || sqlDigest.allColumns.equals(sqlDigest.filterColumns); + for (TblColRef col : cubeDesc.listAllColumns()) { + if (col.getTable().equals(sqlDigest.factTable) && (cubeDesc.listDimensionColumnsIncludingDerived().contains(col) || isSelectAll)) { + sqlDigest.allColumns.add(col); + } + } + + for (TblColRef col : sqlDigest.allColumns) { + if (cubeDesc.listDimensionColumnsIncludingDerived().contains(col)) { + // For dimension columns, take them as group by columns. + sqlDigest.groupbyColumns.add(col); + } else { + // For measure columns, take them as metric columns with aggregation function SUM(). + ParameterDesc colParameter = new ParameterDesc(); + colParameter.setType("column"); + colParameter.setValue(col.getName()); + FunctionDesc sumFunc = new FunctionDesc(); + sumFunc.setExpression("SUM"); + sumFunc.setParameter(colParameter); + + boolean measureHasSum = false; + for (MeasureDesc colMeasureDesc : cubeDesc.getMeasures()) { + if (colMeasureDesc.getFunction().equals(sumFunc)) { + measureHasSum = true; + break; + } + } + if (measureHasSum) { + sqlDigest.aggregations.add(sumFunc); + } else { + logger.warn("SUM is not defined for measure column " + col + ", output will be meaningless."); + } + + sqlDigest.metricColumns.add(col); + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java index 6786f4b..82618e9 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/MeasureType.java @@ -124,7 +124,7 @@ abstract public class MeasureType<T> { * They need to adjust dimensions and measures in <code>sqlDigest</code> before scanning, * such that correct cuboid and measures can be selected by storage. */ - public void adjustSqlDigest(MeasureDesc measureDesc, SQLDigest sqlDigest) { + public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) { } /** Return true if one storage record maps to multiple tuples, or false otherwise. */ http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-metadata/src/main/java/org/apache/kylin/measure/dim/DimCountDistinctMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/dim/DimCountDistinctMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/dim/DimCountDistinctMeasureType.java index 7e4e529..9fe1075 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/dim/DimCountDistinctMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/dim/DimCountDistinctMeasureType.java @@ -18,6 +18,8 @@ package org.apache.kylin.measure.dim; +import java.util.List; + import org.apache.kylin.measure.MeasureAggregator; import org.apache.kylin.measure.MeasureIngester; import org.apache.kylin.measure.MeasureType; @@ -79,8 +81,10 @@ public class DimCountDistinctMeasureType extends MeasureType<Object> { return DimCountDistinctAggFunc.class; } - public void adjustSqlDigest(MeasureDesc measureDesc, SQLDigest sqlDigest) { - sqlDigest.groupbyColumns.addAll(measureDesc.getFunction().getParameter().getColRefs()); - sqlDigest.aggregations.remove(measureDesc.getFunction()); + public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) { + for (MeasureDesc measureDesc : measureDescs) { + sqlDigest.groupbyColumns.addAll(measureDesc.getFunction().getParameter().getColRefs()); + sqlDigest.aggregations.remove(measureDesc.getFunction()); + } } } http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java index dc718d1..796f1f7 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/extendedcolumn/ExtendedColumnMeasureType.java @@ -93,19 +93,21 @@ public class ExtendedColumnMeasureType extends MeasureType<ByteArray> { } @Override - public void adjustSqlDigest(MeasureDesc measureDesc, SQLDigest sqlDigest) { - FunctionDesc extendColumnFunc = measureDesc.getFunction(); - List<TblColRef> hosts = getExtendedColumnHosts(extendColumnFunc); - TblColRef extended = getExtendedColumn(extendColumnFunc); + public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) { + for (MeasureDesc measureDesc : measureDescs) { + FunctionDesc extendColumnFunc = measureDesc.getFunction(); + List<TblColRef> hosts = getExtendedColumnHosts(extendColumnFunc); + TblColRef extended = getExtendedColumn(extendColumnFunc); + + if (!sqlDigest.groupbyColumns.contains(extended)) { + return; + } - if (!sqlDigest.groupbyColumns.contains(extended)) { - return; + sqlDigest.aggregations.add(extendColumnFunc); + sqlDigest.groupbyColumns.remove(extended); + sqlDigest.groupbyColumns.addAll(hosts); + sqlDigest.metricColumns.add(extended); } - - sqlDigest.aggregations.add(extendColumnFunc); - sqlDigest.groupbyColumns.remove(extended); - sqlDigest.groupbyColumns.addAll(hosts); - sqlDigest.metricColumns.add(extended); } @Override @@ -151,7 +153,7 @@ public class ExtendedColumnMeasureType extends MeasureType<ByteArray> { value = null; return; } - + ByteArray byteArray = (ByteArray) measureValue; //the array in ByteArray is guaranteed to be completed owned by the ByteArray value = Bytes.toString(byteArray.array()); http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-metadata/src/main/java/org/apache/kylin/measure/raw/RawMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/raw/RawMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/raw/RawMeasureType.java index aa46149..50715ec 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/raw/RawMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/raw/RawMeasureType.java @@ -166,7 +166,7 @@ public class RawMeasureType extends MeasureType<List<ByteArray>> { public CapabilityResult.CapabilityInfluence influenceCapabilityCheck(Collection<TblColRef> unmatchedDimensions, Collection<FunctionDesc> unmatchedAggregations, SQLDigest digest, MeasureDesc measureDesc) { //is raw query - if (!digest.isRawQuery()) + if (!digest.isRawQuery) return null; TblColRef rawColumn = getRawColumn(measureDesc.getFunction()); @@ -196,27 +196,30 @@ public class RawMeasureType extends MeasureType<List<ByteArray>> { } @Override - public void adjustSqlDigest(MeasureDesc measureDesc, SQLDigest sqlDigest) { - if (sqlDigest.isRawQuery()) { - TblColRef col = this.getRawColumn(measureDesc.getFunction()); - ParameterDesc colParameter = new ParameterDesc(); - colParameter.setType("column"); - colParameter.setValue(col.getName()); - FunctionDesc rawFunc = new FunctionDesc(); - rawFunc.setExpression("RAW"); - rawFunc.setParameter(colParameter); - - if (sqlDigest.allColumns.contains(col)) { - if (measureDesc.getFunction().equals(rawFunc)) { - FunctionDesc sumFunc = new FunctionDesc(); - sumFunc.setExpression("SUM"); - sumFunc.setParameter(colParameter); - sqlDigest.aggregations.remove(sumFunc); - sqlDigest.aggregations.add(rawFunc); - logger.info("Add RAW measure on column " + col); - } - if (!sqlDigest.metricColumns.contains(col)) { - sqlDigest.metricColumns.add(col); + public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) { + + if (sqlDigest.isRawQuery) { + for (MeasureDesc measureDesc : measureDescs) { + TblColRef col = this.getRawColumn(measureDesc.getFunction()); + ParameterDesc colParameter = new ParameterDesc(); + colParameter.setType("column"); + colParameter.setValue(col.getName()); + FunctionDesc rawFunc = new FunctionDesc(); + rawFunc.setExpression("RAW"); + rawFunc.setParameter(colParameter); + + if (sqlDigest.allColumns.contains(col)) { + if (measureDesc.getFunction().equals(rawFunc)) { + FunctionDesc sumFunc = new FunctionDesc(); + sumFunc.setExpression("SUM"); + sumFunc.setParameter(colParameter); + sqlDigest.aggregations.remove(sumFunc); + sqlDigest.aggregations.add(rawFunc); + logger.info("Add RAW measure on column " + col); + } + if (!sqlDigest.metricColumns.contains(col)) { + sqlDigest.metricColumns.add(col); + } } } } http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java index ad42824..ed22d61 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNMeasureType.java @@ -39,8 +39,8 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TblColRef; -import org.apache.kylin.metadata.realization.CapabilityResult.CapabilityInfluence; import org.apache.kylin.metadata.realization.SQLDigest; +import org.apache.kylin.metadata.realization.CapabilityResult.CapabilityInfluence; import org.apache.kylin.metadata.tuple.Tuple; import org.apache.kylin.metadata.tuple.TupleInfo; import org.slf4j.Logger; @@ -290,28 +290,30 @@ public class TopNMeasureType extends MeasureType<TopNCounter<ByteArray>> { } @Override - public void adjustSqlDigest(MeasureDesc measureDesc, SQLDigest sqlDigest) { - FunctionDesc topnFunc = measureDesc.getFunction(); - List<TblColRef> topnLiteralCol = getTopNLiteralColumn(topnFunc); + public void adjustSqlDigest(List<MeasureDesc> measureDescs, SQLDigest sqlDigest) { + for (MeasureDesc measureDesc : measureDescs) { + FunctionDesc topnFunc = measureDesc.getFunction(); + List<TblColRef> topnLiteralCol = getTopNLiteralColumn(topnFunc); - if (sqlDigest.groupbyColumns.containsAll(topnLiteralCol) == false) - return; + if (sqlDigest.groupbyColumns.containsAll(topnLiteralCol) == false) + return; - if (sqlDigest.aggregations.size() > 1) { - throw new IllegalStateException("When query with topN, only one metrics is allowed."); - } + if (sqlDigest.aggregations.size() > 1) { + throw new IllegalStateException("When query with topN, only one metrics is allowed."); + } - if (sqlDigest.aggregations.size() > 0) { - FunctionDesc origFunc = sqlDigest.aggregations.iterator().next(); - if (origFunc.isSum() == false && origFunc.isCount() == false) { - throw new IllegalStateException("When query with topN, only SUM function is allowed."); + if (sqlDigest.aggregations.size() > 0) { + FunctionDesc origFunc = sqlDigest.aggregations.iterator().next(); + if (origFunc.isSum() == false && origFunc.isCount() == false) { + throw new IllegalStateException("When query with topN, only SUM function is allowed."); + } + logger.info("Rewrite function " + origFunc + " to " + topnFunc); } - logger.info("Rewrite function " + origFunc + " to " + topnFunc); - } - sqlDigest.aggregations = Lists.newArrayList(topnFunc); - sqlDigest.groupbyColumns.removeAll(topnLiteralCol); - sqlDigest.metricColumns.addAll(topnLiteralCol); + sqlDigest.aggregations = Lists.newArrayList(topnFunc); + sqlDigest.groupbyColumns.removeAll(topnLiteralCol); + sqlDigest.metricColumns.addAll(topnLiteralCol); + } } @Override http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java index 1eee1e7..d2bba66 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java @@ -44,7 +44,9 @@ public class SQLDigest { public Collection<FunctionDesc> aggregations; public Collection<MeasureDesc> sortMeasures; public Collection<OrderEnum> sortOrders; + public boolean isRawQuery; + //initialized when org.apache.kylin.query.routing.QueryRouter.selectRealization() public SQLDigest(String factTable, TupleFilter filter, Collection<JoinDesc> joinDescs, Collection<TblColRef> allColumns, // Collection<TblColRef> groupbyColumns, Collection<TblColRef> filterColumns, Collection<TblColRef> aggregatedColumns, Collection<FunctionDesc> aggregateFunnc, Collection<MeasureDesc> sortMeasures, Collection<OrderEnum> sortOrders) { this.factTable = factTable; @@ -57,9 +59,10 @@ public class SQLDigest { this.aggregations = aggregateFunnc; this.sortMeasures = sortMeasures; this.sortOrders = sortOrders; + this.isRawQuery = isRawQuery(); } - public boolean isRawQuery() { + private boolean isRawQuery() { return this.groupbyColumns.isEmpty() && // select a group by a -> not raw this.aggregations.isEmpty(); // has aggr -> not raw //the reason to choose aggregations rather than metricColumns is because the former is set earlier at implOLAP http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java ---------------------------------------------------------------------- diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java index bacd293..86346f8 100644 --- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java @@ -22,12 +22,14 @@ import java.util.Collection; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.cube.RawQueryLastHacker; import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.CubeDesc.DeriveInfo; @@ -53,6 +55,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; public abstract class GTCubeStorageQueryBase implements IStorageQuery { @@ -69,7 +72,11 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery { @Override public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { - // allow custom measures hack + + //cope with queries with no aggregations + RawQueryLastHacker.hackNoAggregations(sqlDigest, cubeDesc); + + // Customized measure taking effect: e.g. allow custom measures to help raw queries notifyBeforeStorageQuery(sqlDigest); Collection<TblColRef> groups = sqlDigest.groupbyColumns; @@ -418,9 +425,21 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery { } private void notifyBeforeStorageQuery(SQLDigest sqlDigest) { + Map<String, List<MeasureDesc>> map = Maps.newHashMap(); for (MeasureDesc measure : cubeDesc.getMeasures()) { MeasureType<?> measureType = measure.getFunction().getMeasureType(); - measureType.adjustSqlDigest(measure, sqlDigest); + + String key = measureType.getClass().getCanonicalName(); + List<MeasureDesc> temp = null; + if ((temp = map.get(key)) != null) { + temp.add(measure); + } else { + map.put(key, Lists.<MeasureDesc> newArrayList(measure)); + } + } + + for (List<MeasureDesc> sublist : map.values()) { + sublist.get(0).getFunction().getMeasureType().adjustSqlDigest(sublist, sqlDigest); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/query/src/main/java/org/apache/kylin/query/enumerator/OLAPEnumerator.java ---------------------------------------------------------------------- diff --git a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPEnumerator.java b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPEnumerator.java index 96e75f3..dafbb5f 100644 --- a/query/src/main/java/org/apache/kylin/query/enumerator/OLAPEnumerator.java +++ b/query/src/main/java/org/apache/kylin/query/enumerator/OLAPEnumerator.java @@ -25,14 +25,8 @@ import org.apache.calcite.DataContext; import org.apache.calcite.jdbc.CalciteConnection; import org.apache.calcite.linq4j.Enumerator; import org.apache.kylin.common.util.DateFormat; -import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.metadata.filter.CompareTupleFilter; import org.apache.kylin.metadata.filter.TupleFilter; -import org.apache.kylin.metadata.model.FunctionDesc; -import org.apache.kylin.metadata.model.MeasureDesc; -import org.apache.kylin.metadata.model.ParameterDesc; -import org.apache.kylin.metadata.model.TblColRef; -import org.apache.kylin.metadata.realization.IRealization; import org.apache.kylin.metadata.realization.SQLDigest; import org.apache.kylin.metadata.tuple.ITuple; import org.apache.kylin.metadata.tuple.ITupleIterator; @@ -114,7 +108,6 @@ public class OLAPEnumerator implements Enumerator<Object[]> { // cube don't have correct result for simple query without group by, but let's try to return something makes sense olapContext.resetSQLDigest(); SQLDigest sqlDigest = olapContext.getSQLDigest(); - hackNoGroupByAggregation(sqlDigest); // query storage engine IStorageQuery storageEngine = StorageFactory.createQuery(olapContext.realization); @@ -161,60 +154,4 @@ public class OLAPEnumerator implements Enumerator<Object[]> { olapContext.storageContext.setThreshold(threshold); } - // Hack no-group-by query for better results - private void hackNoGroupByAggregation(SQLDigest sqlDigest) { - if (!(olapContext.realization instanceof CubeInstance)) { - //the hack only makes sense for cubes - return; - } - - if (!sqlDigest.isRawQuery()) { - return; - } - - // If no group by and metric found, then it's simple query like select ... from ... where ..., - // But we have no raw data stored, in order to return better results, we hack to output sum of metric column - logger.info("No group by and aggregation found in this query, will hack some result for better look of output..."); - - // If it's select * from ..., - // We need to retrieve cube to manually add columns into sqlDigest, so that we have full-columns results as output. - IRealization cube = olapContext.realization; - boolean isSelectAll = sqlDigest.allColumns.isEmpty() || sqlDigest.allColumns.equals(sqlDigest.filterColumns); - for (TblColRef col : cube.getAllColumns()) { - if (col.getTable().equals(sqlDigest.factTable) && (cube.getAllDimensions().contains(col) || isSelectAll)) { - sqlDigest.allColumns.add(col); - } - } - - for (TblColRef col : sqlDigest.allColumns) { - if (cube.getAllDimensions().contains(col)) { - // For dimension columns, take them as group by columns. - sqlDigest.groupbyColumns.add(col); - } else { - // For measure columns, take them as metric columns with aggregation function SUM(). - ParameterDesc colParameter = new ParameterDesc(); - colParameter.setType("column"); - colParameter.setValue(col.getName()); - FunctionDesc sumFunc = new FunctionDesc(); - sumFunc.setExpression("SUM"); - sumFunc.setParameter(colParameter); - - boolean measureHasSum = false; - for (MeasureDesc colMeasureDesc : cube.getMeasures()) { - if (colMeasureDesc.getFunction().equals(sumFunc)) { - measureHasSum = true; - break; - } - } - if (measureHasSum) { - sqlDigest.aggregations.add(sumFunc); - } else { - logger.warn("SUM is not defined for measure column " + col + ", output will be meaningless."); - } - - sqlDigest.metricColumns.add(col); - } - } - } - } http://git-wip-us.apache.org/repos/asf/kylin/blob/ae9d7479/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java index 46f16fe..ff729f4 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v1/CubeStorageQuery.java @@ -42,13 +42,14 @@ import org.apache.kylin.common.util.ShardingHash; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.cube.RawQueryLastHacker; import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.kv.RowConstants; import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.cube.model.CubeDesc.DeriveInfo; import org.apache.kylin.cube.model.HBaseColumnDesc; import org.apache.kylin.cube.model.HBaseMappingDesc; import org.apache.kylin.cube.model.RowKeyDesc; +import org.apache.kylin.cube.model.CubeDesc.DeriveInfo; import org.apache.kylin.dict.lookup.LookupStringTable; import org.apache.kylin.measure.MeasureType; import org.apache.kylin.metadata.filter.ColumnTupleFilter; @@ -97,7 +98,10 @@ public class CubeStorageQuery implements IStorageQuery { @Override public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { - // allow custom measures hack + //cope with queries with no aggregations + RawQueryLastHacker.hackNoAggregations(sqlDigest, cubeDesc); + + // Customized measure taking effect: e.g. allow custom measures to help raw queries notifyBeforeStorageQuery(sqlDigest); Collection<TblColRef> groups = sqlDigest.groupbyColumns; @@ -762,9 +766,22 @@ public class CubeStorageQuery implements IStorageQuery { } private void notifyBeforeStorageQuery(SQLDigest sqlDigest) { + + Map<String, List<MeasureDesc>> map = Maps.newHashMap(); for (MeasureDesc measure : cubeDesc.getMeasures()) { MeasureType<?> measureType = measure.getFunction().getMeasureType(); - measureType.adjustSqlDigest(measure, sqlDigest); + + String key = measureType.getClass().getCanonicalName(); + List<MeasureDesc> temp = null; + if ((temp = map.get(key)) != null) { + temp.add(measure); + } else { + map.put(key, Lists.<MeasureDesc> newArrayList(measure)); + } + } + + for (List<MeasureDesc> sublist : map.values()) { + sublist.get(0).getFunction().getMeasureType().adjustSqlDigest(sublist, sqlDigest); } }