Merge commit '782a97482ca1e92cf24e04badd8ee48c9b829f46'
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ee5397bc Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ee5397bc Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ee5397bc Branch: refs/heads/master Commit: ee5397bc6207c9cdf57cdce93bcda6bcac58a5a9 Parents: d4cce76 782a974 Author: Hongbin Ma <mahong...@apache.org> Authored: Fri Mar 31 20:00:16 2017 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Fri Mar 31 20:00:59 2017 +0800 ---------------------------------------------------------------------- build/conf/kylin.properties | 2 +- .../apache/kylin/common/KylinConfigBase.java | 4 + .../kylin/common/util/ImmutableBitSet.java | 29 +- .../org/apache/kylin/GTForwardingScanner.java | 56 ++++ .../java/org/apache/kylin/cube/CubeManager.java | 8 +- .../kylin/cube/gridtable/CubeGridTable.java | 18 -- .../gridtable/CuboidToGridTableMapping.java | 18 ++ .../cube/inmemcubing/InMemCubeBuilder.java | 6 +- .../org/apache/kylin/cube/model/CubeDesc.java | 11 +- .../kylin/gridtable/GTAggregateScanner.java | 16 +- .../apache/kylin/gridtable/GTFilterScanner.java | 22 +- .../org/apache/kylin/gridtable/GTRecord.java | 80 ++--- .../apache/kylin/gridtable/GTScanRequest.java | 13 + .../gridtable/GTStreamAggregateScanner.java | 219 ++++++++++++++ .../kylin/gridtable/GTScanReqSerDerTest.java | 4 +- .../apache/kylin/measure/hllc/HLLCounter.java | 54 ++-- .../apache/kylin/storage/StorageContext.java | 8 + .../storage/gtrecord/CubeScanRangePlanner.java | 3 +- .../storage/gtrecord/CubeSegmentScanner.java | 7 +- .../storage/gtrecord/CubeTupleConverter.java | 26 +- .../gtrecord/GTCubeStorageQueryBase.java | 35 ++- .../kylin/storage/gtrecord/ITupleConverter.java | 3 +- .../gtrecord/PartitionResultIterator.java | 59 ++++ .../kylin/storage/gtrecord/ScannerWorker.java | 5 +- .../gtrecord/SegmentCubeTupleIterator.java | 71 ++++- .../SortMergedPartitionResultIterator.java | 81 +++++ .../gtrecord/StorageResponseGTScatter.java | 83 ++--- .../mr/steps/FactDistinctColumnsMapper.java | 31 +- .../mr/steps/NewCubeSamplingMethodTest.java | 299 +++++++++++++++++++ .../hbase/cube/v2/CubeHBaseEndpointRPC.java | 7 +- .../storage/hbase/cube/v2/CubeHBaseRPC.java | 5 +- .../storage/hbase/cube/v2/CubeHBaseScanRPC.java | 5 +- .../apache/kylin/tool/StorageCleanupJob.java | 13 +- 33 files changed, 1047 insertions(+), 254 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ee5397bc/build/conf/kylin.properties ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ee5397bc/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ee5397bc/core-cube/src/main/java/org/apache/kylin/gridtable/GTRecord.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/gridtable/GTRecord.java index f4480c8,3397adc..3e62ea7 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTRecord.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTRecord.java @@@ -118,20 -114,14 +114,24 @@@ public class GTRecord implements Compar return result; } ++ + /** decode and return the values of this record */ + public Object[] getValues(int[] selectedColumns, Object[] result) { + assert selectedColumns.length <= result.length; + for (int i = 0; i < selectedColumns.length; i++) { - int c = selectedColumns[i]; - if (cols[c].array() == null) { - result[i] = null; - } else { - result[i] = info.codeSystem.decodeColumnValue(c, cols[c].asBuffer()); - } ++ result[i] = decodeValue(selectedColumns[i]); + } + return result; + } + + public Object decodeValue(int c) { + ByteArray col = cols[c]; + if (col != null && col.array() != null) { + return info.codeSystem.decodeColumnValue(c, col.asBuffer()); + } + return null; + } + public int sizeOf(ImmutableBitSet selectedCols) { int size = 0; for (int i = 0; i < selectedCols.trueBitCount(); i++) { http://git-wip-us.apache.org/repos/asf/kylin/blob/ee5397bc/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeTupleConverter.java ---------------------------------------------------------------------- diff --cc core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeTupleConverter.java index 526a554,b762e5c..7df80ca --- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeTupleConverter.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/CubeTupleConverter.java @@@ -27,20 -25,14 +27,18 @@@ import java.util.Set import org.apache.kylin.common.util.Array; import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.cuboid.Cuboid; - import org.apache.kylin.cube.gridtable.CuboidToGridTableMapping; import org.apache.kylin.cube.model.CubeDesc.DeriveInfo; import org.apache.kylin.dict.lookup.LookupStringTable; +import org.apache.kylin.dict.lookup.SnapshotManager; +import org.apache.kylin.dict.lookup.SnapshotTable; - import org.apache.kylin.gridtable.GTRecord; import org.apache.kylin.measure.MeasureType; import org.apache.kylin.measure.MeasureType.IAdvMeasureFiller; +import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.JoinDesc; +import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.metadata.tuple.Tuple; import org.apache.kylin.metadata.tuple.TupleInfo; @@@ -51,10 -40,11 +49,13 @@@ import org.slf4j.LoggerFactory import com.google.common.collect.Lists; import com.google.common.collect.Maps; + /** + * Convert Object[] (decoded GTRecord) to tuple + */ public class CubeTupleConverter implements ITupleConverter { + private static final Logger logger = LoggerFactory.getLogger(CubeTupleConverter.class); + final CubeSegment cubeSeg; final Cuboid cuboid; final TupleInfo tupleInfo; @@@ -71,9 -60,11 +71,10 @@@ private final int nSelectedDims; public CubeTupleConverter(CubeSegment cubeSeg, Cuboid cuboid, // - Set<TblColRef> selectedDimensions, Set<FunctionDesc> selectedMetrics, TupleInfo returnTupleInfo) { - Set<TblColRef> selectedDimensions, Set<FunctionDesc> selectedMetrics, int[] gtColIdx, - TupleInfo returnTupleInfo) { ++ Set<TblColRef> selectedDimensions, Set<FunctionDesc> selectedMetrics, int[] gtColIdx, TupleInfo returnTupleInfo) { this.cubeSeg = cubeSeg; this.cuboid = cuboid; + this.gtColIdx = gtColIdx; this.tupleInfo = returnTupleInfo; this.derivedColFillers = Lists.newArrayList(); http://git-wip-us.apache.org/repos/asf/kylin/blob/ee5397bc/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java ---------------------------------------------------------------------- diff --cc core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java index 1b4964f,d91a0b4..065fd6e --- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java @@@ -72,30 -72,6 +72,28 @@@ public abstract class GTCubeStorageQuer @Override public ITupleIterator search(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { + GTCubeStorageQueryRequest request = getStorageQueryRequest(context, sqlDigest, returnTupleInfo); + + List<CubeSegmentScanner> scanners = Lists.newArrayList(); + for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) { + CubeSegmentScanner scanner; + + if (cubeDesc.getConfig().isSkippingEmptySegments() && cubeSeg.getInputRecords() == 0) { + logger.info("Skip cube segment {} because its input record is 0", cubeSeg); + continue; + } + + scanner = new CubeSegmentScanner(cubeSeg, request.getCuboid(), request.getDimensions(), request.getGroups(), request.getMetrics(), request.getFilter(), request.getContext()); + scanners.add(scanner); + } + + if (scanners.isEmpty()) + return ITupleIterator.EMPTY_TUPLE_ITERATOR; + + return new SequentialCubeTupleIterator(scanners, request.getCuboid(), request.getDimensions(), request.getMetrics(), returnTupleInfo, request.getContext()); + } + - protected abstract String getGTStorage(); - + protected GTCubeStorageQueryRequest getStorageQueryRequest(StorageContext context, SQLDigest sqlDigest, TupleInfo returnTupleInfo) { context.setStorageQuery(this); //deal with participant columns in subquery join @@@ -148,13 -125,29 +148,15 @@@ // set query deadline context.setDeadline(cubeInstance); - List<CubeSegmentScanner> scanners = Lists.newArrayList(); - for (CubeSegment cubeSeg : cubeInstance.getSegments(SegmentStatusEnum.READY)) { - CubeSegmentScanner scanner; - - if (cubeDesc.getConfig().isSkippingEmptySegments() && cubeSeg.getInputRecords() == 0) { - logger.info("Skip cube segment {} because its input record is 0", cubeSeg); - continue; - } + logger.info("Cuboid identified: cube={}, cuboidId={}, groupsD={}, filterD={}, limitPushdown={}, storageAggr={}", cubeInstance.getName(), cuboid.getId(), groupsD, filterColumnD, context.getFinalPushDownLimit(), context.isNeedStorageAggregation()); - scanner = new CubeSegmentScanner(cubeSeg, cuboid, dimensionsD, groupsD, metrics, filterD, context); - scanners.add(scanner); - } - - if (scanners.isEmpty()) - return ITupleIterator.EMPTY_TUPLE_ITERATOR; - - return new SequentialCubeTupleIterator(scanners, cuboid, dimensionsD, metrics, returnTupleInfo, context); + return new GTCubeStorageQueryRequest(cuboid, dimensionsD, groupsD, metrics, filterD, context); } - protected ITupleConverter newCubeTupleConverter(CubeSegment cubeSeg, Cuboid cuboid, Set<TblColRef> selectedDimensions, Set<FunctionDesc> selectedMetrics, TupleInfo tupleInfo) { - return new CubeTupleConverter(cubeSeg, cuboid, selectedDimensions, selectedMetrics, tupleInfo); + protected abstract String getGTStorage(); + + protected ITupleConverter newCubeTupleConverter(CubeSegment cubeSeg, Cuboid cuboid, Set<TblColRef> selectedDimensions, Set<FunctionDesc> selectedMetrics, int[] gtColIdx, TupleInfo tupleInfo) { + return new CubeTupleConverter(cubeSeg, cuboid, selectedDimensions, selectedMetrics, gtColIdx, tupleInfo); } protected void buildDimensionsAndMetrics(SQLDigest sqlDigest, Collection<TblColRef> dimensions, Collection<FunctionDesc> metrics) {