Merge commit 'b3b7a4e7e04831924ff1f6d844956ca5df9dcab1'
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/b9e1108e Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/b9e1108e Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/b9e1108e Branch: refs/heads/master Commit: b9e1108e369909845ef917b985b70f8b76bd69f8 Parents: 4c1c0aa b3b7a4e Author: Li Yang <liy...@apache.org> Authored: Sun Sep 10 09:46:53 2017 +0800 Committer: Li Yang <liy...@apache.org> Committed: Sun Sep 10 09:46:53 2017 +0800 ---------------------------------------------------------------------- .travis.yml | 1 - build/bin/find-hadoop-conf-dir.sh | 106 +++--- build/bin/sample.sh | 3 +- .../org/apache/kylin/common/KylinConfig.java | 20 -- .../apache/kylin/common/KylinConfigBase.java | 4 + .../kylin/common/restclient/RestClient.java | 144 +++++--- .../org/apache/kylin/common/util/JsonUtil.java | 4 + .../main/resources/kylin-defaults.properties | 2 + .../org/apache/kylin/cube/CubeDescManager.java | 3 - .../org/apache/kylin/cube/CubeInstance.java | 117 ++++++- .../java/org/apache/kylin/cube/CubeManager.java | 51 ++- .../java/org/apache/kylin/cube/CubeSegment.java | 5 + .../kylin/cube/cli/DictionaryGeneratorCLI.java | 39 +-- .../kylin/cube/common/RowKeySplitter.java | 4 +- .../org/apache/kylin/cube/cuboid/Cuboid.java | 181 +++------- .../org/apache/kylin/cube/cuboid/CuboidCLI.java | 20 +- .../kylin/cube/cuboid/CuboidModeEnum.java | 48 +++ .../kylin/cube/cuboid/CuboidScheduler.java | 32 +- .../cube/cuboid/DefaultCuboidScheduler.java | 80 ++++- .../kylin/cube/cuboid/TreeCuboidScheduler.java | 329 +++++++++++++++++++ .../inmemcubing/AbstractInMemCubeBuilder.java | 12 +- .../cube/inmemcubing/DoggedCubeBuilder.java | 9 +- .../cube/inmemcubing/InMemCubeBuilder.java | 11 +- .../org/apache/kylin/cube/kv/RowKeyDecoder.java | 4 +- .../org/apache/kylin/cube/kv/RowKeyEncoder.java | 11 +- .../kylin/cube/model/AggregationGroup.java | 84 ++++- .../org/apache/kylin/cube/model/CubeDesc.java | 30 +- .../cube/model/CubeJoinedFlatTableEnrich.java | 3 +- .../org/apache/kylin/cube/util/CubingUtils.java | 2 +- .../kylin/cube/cuboid/CuboidSchedulerTest.java | 26 +- .../apache/kylin/cube/cuboid/CuboidTest.java | 58 ++-- .../cube/cuboid/TreeCuboidSchedulerTest.java | 148 +++++++++ .../apache/kylin/cube/kv/RowKeyDecoderTest.java | 2 +- .../apache/kylin/cube/kv/RowKeyEncoderTest.java | 6 +- .../apache/kylin/dict/DictionaryManager.java | 56 +--- .../kylin/dict/DictionaryManagerTest.java | 35 +- .../impl/threadpool/DistributedScheduler.java | 2 +- .../gtrecord/GTCubeStorageQueryBase.java | 6 +- .../kylin/storage/translate/HBaseKeyRange.java | 2 +- .../kylin/engine/mr/BatchCubingJobBuilder.java | 2 +- .../kylin/engine/mr/BatchCubingJobBuilder2.java | 2 +- .../engine/mr/common/BaseCuboidBuilder.java | 14 +- .../kylin/engine/mr/common/CubeStatsReader.java | 7 +- .../engine/mr/steps/FactDistinctColumnsJob.java | 4 +- .../mr/steps/FactDistinctColumnsMapper.java | 8 +- .../mr/steps/FactDistinctColumnsMapperBase.java | 12 +- .../mr/steps/FactDistinctColumnsReducer.java | 4 +- .../engine/mr/steps/InMemCuboidMapper.java | 3 +- .../kylin/engine/mr/steps/KVGTRecordWriter.java | 2 +- .../engine/mr/steps/MergeCuboidMapper.java | 22 +- .../engine/mr/steps/MergeDictionaryStep.java | 27 +- .../kylin/engine/mr/steps/NDCuboidMapper.java | 6 +- .../apache/kylin/engine/spark/SparkCubing.java | 9 +- .../kylin/engine/spark/SparkCubingByLayer.java | 13 +- .../spark/cube/DefaultTupleConverter.java | 2 +- .../kylin/cube/ITDictionaryManagerTest.java | 6 +- .../ITDoggedCubeBuilderStressTest.java | 2 +- .../inmemcubing/ITDoggedCubeBuilderTest.java | 4 +- .../inmemcubing/ITInMemCubeBuilderTest.java | 2 +- .../kylin/rest/job/StorageCleanupJob.java | 6 +- .../storage/hbase/steps/HBaseCuboidWriter.java | 4 +- .../hbase/util/ZookeeperDistributedLock.java | 2 +- .../cube/MeasureTypeOnlyAggrInBaseTest.java | 4 +- .../org/apache/kylin/tool/CubeMigrationCLI.java | 2 +- 64 files changed, 1246 insertions(+), 623 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-common/src/main/java/org/apache/kylin/common/KylinConfig.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-common/src/main/resources/kylin-defaults.properties ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java index 9f3336c,d5d6ac9..7d539c6 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java @@@ -99,7 -118,26 +118,26 @@@ public class CubeInstance extends RootP public CubeInstance() { } + public CuboidScheduler getCuboidScheduler() { + if (cuboidScheduler != null) + return cuboidScheduler; + + synchronized (this) { + if (cuboidScheduler == null) { + Map<Long, Long> cuboidsWithRowCnt = getCuboids(); + if (cuboidsWithRowCnt == null) { + cuboidScheduler = getDescriptor().getInitialCuboidScheduler(); + } else { + cuboidScheduler = new TreeCuboidScheduler(getDescriptor(), + Lists.newArrayList(cuboidsWithRowCnt.keySet()), + new TreeCuboidScheduler.CuboidCostComparator(cuboidsWithRowCnt)); + } + } + } + return cuboidScheduler; + } + - public List<CubeSegment> getBuildingSegments() { + public Segments<CubeSegment> getBuildingSegments() { return segments.getBuildingSegments(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java index f6de446,eb330ae..5fd3d1b mode 100644,100755..100755 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java index b71608c,f314baf..74eb853 --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/Cuboid.java @@@ -224,8 -151,8 +151,8 @@@ public class Cuboid implements Comparab private volatile CuboidToGridTableMapping cuboidToGridTableMapping = null; -- // will translate the cuboidID if it is not valid -- private Cuboid(CubeDesc cubeDesc, long originalID, long validID) { ++ /** Should be more private. For test only. */ ++ public Cuboid(CubeDesc cubeDesc, long originalID, long validID) { this.cubeDesc = cubeDesc; this.inputID = originalID; this.id = validID; http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java index e802230,5de7e9e..2f25b9c --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidScheduler.java @@@ -61,16 -60,27 +61,29 @@@ abstract public class CuboidScheduler /** Returns the child cuboids of a parent. */ abstract public List<Long> getSpanningCuboid(long parentCuboid); - /** Returns a cuboid on the tree that best matches the request cuboid. */ + /** Returns a valid cuboid that best matches the request cuboid. */ abstract public long findBestMatchCuboid(long requestCuboid); - - /** (AggGroupScheduler) Calculate the cuboid set defined by an aggregation group. */ + - /** Checks whether a cuboid is valid or not. */ - abstract public boolean isValid(long requestCuboid); - - /** Returns the key for what this cuboid scheduler responsible for. */ - abstract public String getCuboidCacheKey(); ++ /** optional */ + abstract public Set<Long> calculateCuboidsForAggGroup(AggregationGroup agg); // ============================================================================ private transient List<List<Long>> cuboidsByLayer; + public long getBaseCuboidId() { + return Cuboid.getBaseCuboidId(cubeDesc); + } + + public CubeDesc getCubeDesc() { + return cubeDesc; + } + ++ /** Checks whether a cuboid is valid or not. */ ++ public boolean isValid(long requestCuboid) { ++ return getAllCuboidIds().contains(requestCuboid); ++ } ++ /** * Get cuboids by layer. It's built from pre-expanding tree. * @return layered cuboids @@@ -105,4 -115,11 +118,17 @@@ return cuboidsByLayer; } + /** + * Get cuboid level count except base cuboid + * @return + */ + public int getBuildLevel() { + return getCuboidsByLayer().size() - 1; + } ++ ++ /** Returns the key for what this cuboid scheduler responsible for. */ ++ public String getCuboidCacheKey() { ++ return CubeDesc.class.getSimpleName() + "-" + cubeDesc.getName(); ++ } ++ } http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/cuboid/DefaultCuboidScheduler.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/cuboid/DefaultCuboidScheduler.java index f7d22da,9762cdc..727f61b --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/DefaultCuboidScheduler.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/DefaultCuboidScheduler.java @@@ -83,14 -87,48 +83,44 @@@ public class DefaultCuboidScheduler ext return Sets.newHashSet(allCuboidIds); } - /** - * Get the parent cuboid really on the spanning tree. - * @param child an on-tree cuboid - * @return - */ @Override - public long findBestMatchCuboid(long child) { - long parent = getOnTreeParent(child); + public boolean isValid(long requestCuboid) { + return allCuboidIds.contains(requestCuboid); + } + - /** - * Get the parent cuboid rely on the spanning tree. - * @param cuboid an on-tree cuboid - * @return - */ ++ /** Returns a valid cuboid that best matches the request cuboid. */ + @Override + public long findBestMatchCuboid(long cuboid) { + return findBestMatchCuboid1(cuboid); + } + + long findBestMatchCuboid1(long cuboid) { + if (isValid(cuboid)) { + return cuboid; + } + + List<Long> onTreeCandidates = Lists.newArrayList(); + for (AggregationGroup agg : cubeDesc.getAggregationGroups()) { - Long candidate = translateToOnTreeCuboid(agg, cuboid); ++ Long candidate = agg.translateToOnTreeCuboid(cuboid); + if (candidate != null) { + onTreeCandidates.add(candidate); + } + } + + if (onTreeCandidates.size() == 0) { + return getBaseCuboidId(); + } + + long onTreeCandi = Collections.min(onTreeCandidates, Cuboid.cuboidSelectComparator); + if (isValid(onTreeCandi)) { + return onTreeCandi; + } + + return doFindBestMatchCuboid1(onTreeCandi); + } + + private long doFindBestMatchCuboid1(long cuboid) { + long parent = getOnTreeParent(cuboid); while (parent > 0) { if (cubeDesc.getAllCuboids().contains(parent)) { break; @@@ -352,4 -466,40 +381,37 @@@ return Long.bitCount(cuboidID) <= dimCap; } + long findBestMatchCuboid2(long cuboid) { + long bestParent = doFindBestMatchCuboid2(cuboid, Cuboid.getBaseCuboidId(cubeDesc)); + if (bestParent < -1) { + throw new IllegalStateException("Cannot find the parent of the cuboid:" + cuboid); + } + return bestParent; + } + + private long doFindBestMatchCuboid2(long cuboid, long parent) { + if (!canDerive(cuboid, parent)) { + return -1; + } + List<Long> children = parent2child.get(parent); + List<Long> candidates = Lists.newArrayList(); + if (children != null) { + for (long child : children) { + long candidate = doFindBestMatchCuboid2(cuboid, child); + if (candidate > 0) { + candidates.add(candidate); + } + } + } + if (candidates.isEmpty()) { + candidates.add(parent); + } + + return Collections.min(candidates, Cuboid.cuboidSelectComparator); + } + + private boolean canDerive(long cuboidId, long parentCuboid) { + return (cuboidId & ~parentCuboid) == 0; + } + - public String getCuboidCacheKey() { - return CubeDesc.class.getName() + "-" + cubeDesc.getName(); - } } http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java index 0000000,e0e82aa..8e3b4dc mode 000000,100644..100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java @@@ -1,0 -1,322 +1,329 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.kylin.cube.cuboid; + + import java.io.PrintWriter; + import java.util.ArrayList; + import java.util.Collections; + import java.util.Comparator; + import java.util.HashMap; + import java.util.LinkedList; + import java.util.List; + import java.util.Map; + import java.util.Set; + + import javax.annotation.Nullable; + + import org.apache.kylin.cube.CubeInstance; ++import org.apache.kylin.cube.model.AggregationGroup; + import org.apache.kylin.cube.model.CubeDesc; + + import com.fasterxml.jackson.annotation.JsonIgnore; + import com.fasterxml.jackson.annotation.JsonProperty; + import com.google.common.annotations.VisibleForTesting; + import com.google.common.base.Function; + import com.google.common.base.Preconditions; + import com.google.common.collect.Lists; + + public class TreeCuboidScheduler extends CuboidScheduler { + + final private CuboidTree cuboidTree; + + public TreeCuboidScheduler(CubeDesc cubeDesc, List<Long> allCuboidIds, Comparator<Long> cuboidComparator) { + super(cubeDesc); + cuboidTree = CuboidTree.createFromCuboids(allCuboidIds, cuboidComparator); + } + + @Override + public Set<Long> getAllCuboidIds() { + return cuboidTree.getAllCuboidIds(); + } + + @Override + public int getCuboidCount() { + return cuboidTree.getCuboidCount(Cuboid.getBaseCuboidId(cubeDesc)); + } + + @Override + public List<Long> getSpanningCuboid(long cuboidId) { + return cuboidTree.getSpanningCuboid(cuboidId); + } + + @Override + public long findBestMatchCuboid(long cuboidId) { + return cuboidTree.findBestMatchCuboid(cuboidId); + } + + @Override + public boolean isValid(long requestCuboid) { + return cuboidTree.isValid(requestCuboid); + } + + public static class CuboidTree { + private int treeLevels; + + private TreeNode root; + + private Comparator<Long> cuboidComparator; + + private Map<Long, TreeNode> index = new HashMap<>(); + + @VisibleForTesting + static CuboidTree createFromCuboids(List<Long> allCuboidIds) { + return createFromCuboids(allCuboidIds, Cuboid.cuboidSelectComparator); + } + + @VisibleForTesting + static CuboidTree createFromCuboids(List<Long> allCuboidIds, Comparator<Long> cuboidComparator) { + // sort the cuboid ids in descending order, so that don't need to adjust + // the cuboid tree when adding cuboid id to the tree. + Collections.sort(allCuboidIds, new Comparator<Long>() { + @Override + public int compare(Long o1, Long o2) { + return Long.compare(o2, o1); + } + }); + long basicCuboidId = allCuboidIds.get(0); + CuboidTree cuboidTree = new CuboidTree(cuboidComparator); + cuboidTree.setRoot(basicCuboidId); + + for (long cuboidId : allCuboidIds) { + cuboidTree.addCuboid(cuboidId); + } + cuboidTree.buildIndex(); + return cuboidTree; + } + + private CuboidTree(Comparator<Long> cuboidComparator) { + this.cuboidComparator = cuboidComparator; + } + + public Set<Long> getAllCuboidIds() { + return index.keySet(); + } + + public List<Long> getSpanningCuboid(long cuboidId) { + TreeNode node = index.get(cuboidId); + if (node == null) { + throw new IllegalArgumentException("the cuboid:" + cuboidId + " is not exist in the tree"); + } + + return Lists.transform(node.children, new Function<TreeNode, Long>() { + @Nullable + @Override + public Long apply(@Nullable TreeNode input) { + return input.cuboidId; + } + }); + } + + public long findBestMatchCuboid(long cuboidId) { + // exactly match + if (isValid(cuboidId)) { + return cuboidId; + } + + return findBestParent(cuboidId).cuboidId; + } + + public boolean isValid(long cuboidId) { + return index.containsKey(cuboidId); + } + + private int getCuboidCount(long cuboidId) { + int r = 1; + for (Long child : getSpanningCuboid(cuboidId)) { + r += getCuboidCount(child); + } + return r; + } + + public void print(PrintWriter out) { + int dimensionCnt = Long.bitCount(root.cuboidId); + doPrint(root, dimensionCnt, 0, out); + } + + private void doPrint(TreeNode node, int dimensionCount, int depth, PrintWriter out) { + printCuboid(node.cuboidId, dimensionCount, depth, out); + + for (TreeNode child : node.children) { + doPrint(child, dimensionCount, depth + 1, out); + } + } + + private void printCuboid(long cuboidID, int dimensionCount, int depth, PrintWriter out) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < depth; i++) { + sb.append(" "); + } + String cuboidName = Cuboid.getDisplayName(cuboidID, dimensionCount); + sb.append("|---- Cuboid ").append(cuboidName).append("(" + cuboidID + ")"); + out.println(sb.toString()); + } + + private void setRoot(long basicCuboidId) { + this.root = new TreeNode(basicCuboidId, 0); + this.treeLevels = 0; + } + + private void buildIndex() { + LinkedList<TreeNode> queue = new LinkedList<>(); + queue.add(root); + while (!queue.isEmpty()) { + TreeNode node = queue.removeFirst(); + index.put(node.cuboidId, node); + for (TreeNode child : node.children) { + queue.add(child); + } + } + } + + private void addCuboid(long cuboidId) { + TreeNode parent = findBestParent(cuboidId); + if (parent != null && parent.cuboidId != cuboidId) { + parent.addChild(cuboidId, parent.level); + this.treeLevels = Math.max(this.treeLevels, parent.level + 1); + } + } + + private TreeNode findBestParent(long cuboidId) { + TreeNode bestParent = doFindBestParent(cuboidId, root); + if (bestParent == null) { + throw new IllegalStateException("Cannot find the parent of the cuboid:" + cuboidId); + } + return bestParent; + } + + private TreeNode doFindBestParent(long cuboidId, TreeNode parentCuboid) { + if (!canDerive(cuboidId, parentCuboid.cuboidId)) { + return null; + } + + List<TreeNode> candidates = Lists.newArrayList(); + for (TreeNode childCuboid : parentCuboid.children) { + TreeNode candidate = doFindBestParent(cuboidId, childCuboid); + if (candidate != null) { + candidates.add(candidate); + } + } + if (candidates.isEmpty()) { + candidates.add(parentCuboid); + } + + return Collections.min(candidates, new Comparator<TreeNode>() { + @Override + public int compare(TreeNode o1, TreeNode o2) { + return cuboidComparator.compare(o1.cuboidId, o2.cuboidId); + } + }); + } + + private boolean canDerive(long cuboidId, long parentCuboid) { + return (cuboidId & ~parentCuboid) == 0; + } + } + + public static class TreeNode { + @JsonProperty("cuboid_id") + long cuboidId; + @JsonIgnore + int level; + @JsonProperty("children") + List<TreeNode> children = new ArrayList<>(); + + public long getCuboidId() { + return cuboidId; + } + + public int getLevel() { + return level; + } + + public List<TreeNode> getChildren() { + return children; + } + + TreeNode(long cuboidId, int level) { + this.cuboidId = cuboidId; + this.level = level; + } + + void addChild(long childId, int parentlevel) { + this.children.add(new TreeNode(childId, parentlevel + 1)); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (int) (cuboidId ^ (cuboidId >>> 32)); + result = prime * result + level; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + TreeNode other = (TreeNode) obj; + if (cuboidId != other.cuboidId) + return false; + if (level != other.level) + return false; + return true; + } + } + + /** + * Compare cuboid according to the cuboid data row count + */ + public static class CuboidCostComparator implements Comparator<Long> { + private Map<Long, Long> cuboidStatistics; + + public CuboidCostComparator(Map<Long, Long> cuboidStatistics) { + Preconditions.checkArgument(cuboidStatistics != null, + "the input " + cuboidStatistics + " should not be null!!!"); + this.cuboidStatistics = cuboidStatistics; + } + + @Override + public int compare(Long cuboid1, Long cuboid2) { + Long rowCnt1 = cuboidStatistics.get(cuboid1); + Long rowCnt2 = cuboidStatistics.get(cuboid2); + if (rowCnt2 == null || rowCnt1 == null) { + return Cuboid.cuboidSelectComparator.compare(cuboid1, cuboid2); + } + return Long.compare(rowCnt1, rowCnt2); + } + } + ++ @Override + public String getCuboidCacheKey() { - return CubeInstance.class.getName() + "-" + cubeDesc.getName(); ++ return CubeInstance.class.getSimpleName() + "-" + cubeDesc.getName(); ++ } ++ ++ @Override ++ public Set<Long> calculateCuboidsForAggGroup(AggregationGroup agg) { ++ throw new UnsupportedOperationException(); + } + } http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/model/AggregationGroup.java ---------------------------------------------------------------------- diff --cc core-cube/src/main/java/org/apache/kylin/cube/model/AggregationGroup.java index af026af,c1dbc1e..82b32a9 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/AggregationGroup.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/AggregationGroup.java @@@ -21,6 -21,6 +21,8 @@@ package org.apache.kylin.cube.model import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; ++import java.util.Collection; ++import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@@ -33,7 -34,7 +36,9 @@@ import org.apache.kylin.metadata.model. import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonProperty; ++import com.google.common.base.Function; import com.google.common.base.Preconditions; ++import com.google.common.collect.Collections2; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@@ -328,6 -330,6 +334,82 @@@ public class AggregationGroup implement return combination; } ++ public Long translateToOnTreeCuboid(long cuboidID) { ++ if ((cuboidID & ~getPartialCubeFullMask()) > 0) { ++ //the partial cube might not contain all required dims ++ return null; ++ } ++ ++ // add mandantory ++ cuboidID = cuboidID | getMandatoryColumnMask(); ++ ++ // add hierarchy ++ for (HierarchyMask hierarchyMask : getHierarchyMasks()) { ++ long fullMask = hierarchyMask.fullMask; ++ long intersect = cuboidID & fullMask; ++ if (intersect != 0 && intersect != fullMask) { ++ ++ boolean startToFill = false; ++ for (int i = hierarchyMask.dims.length - 1; i >= 0; i--) { ++ if (startToFill) { ++ cuboidID |= hierarchyMask.dims[i]; ++ } else { ++ if ((cuboidID & hierarchyMask.dims[i]) != 0) { ++ startToFill = true; ++ cuboidID |= hierarchyMask.dims[i]; ++ } ++ } ++ } ++ } ++ } ++ ++ // add joint dims ++ for (Long joint : getJoints()) { ++ if (((cuboidID | joint) != cuboidID) && ((cuboidID & ~joint) != cuboidID)) { ++ cuboidID = cuboidID | joint; ++ } ++ } ++ ++ if (!isOnTree(cuboidID)) { ++ // no column, add one column ++ long nonJointDims = removeBits((getPartialCubeFullMask() ^ getMandatoryColumnMask()), getJoints()); ++ if (nonJointDims != 0) { ++ long nonJointNonHierarchy = removeBits(nonJointDims, ++ Collections2.transform(getHierarchyMasks(), new Function<HierarchyMask, Long>() { ++ @Override ++ public Long apply(HierarchyMask input) { ++ return input.fullMask; ++ } ++ })); ++ if (nonJointNonHierarchy != 0) { ++ //there exists dim that does not belong to any joint or any hierarchy, that's perfect ++ return cuboidID | Long.lowestOneBit(nonJointNonHierarchy); ++ } else { ++ //choose from a hierarchy that does not intersect with any joint dim, only check level 1 ++ long allJointDims = getJointDimsMask(); ++ for (HierarchyMask hierarchyMask : getHierarchyMasks()) { ++ long dim = hierarchyMask.allMasks[0]; ++ if ((dim & allJointDims) == 0) { ++ return cuboidID | dim; ++ } ++ } ++ } ++ } ++ ++ cuboidID = cuboidID | Collections.min(getJoints(), Cuboid.cuboidSelectComparator); ++ Preconditions.checkState(isOnTree(cuboidID)); ++ } ++ return cuboidID; ++ } ++ ++ private long removeBits(long original, Collection<Long> toRemove) { ++ long ret = original; ++ for (Long joint : toRemove) { ++ ret = ret & ~joint; ++ } ++ return ret; ++ } ++ public boolean isOnTree(long cuboidID) { if (cuboidID <= 0) { return false; //cuboid must be greater than 0 http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --cc core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index 1628f4e,7a253e9..135ab0c mode 100644,100755..100755 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java ---------------------------------------------------------------------- diff --cc engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java index 6e1a5dc,bcd6df0..82bfbd6 mode 100644,100755..100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapperBase.java ---------------------------------------------------------------------- diff --cc engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapperBase.java index 9952def,91454da..00b831a mode 100644,100755..100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapperBase.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapperBase.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java ---------------------------------------------------------------------- diff --cc engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index 20fb9e2,96a7d15..73c8a20 mode 100644,100755..100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/InMemCuboidMapper.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapper.java ---------------------------------------------------------------------- diff --cc engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapper.java index c2ef2d2,97f5961..6cd7c28 mode 100644,100755..100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapper.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapper.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeDictionaryStep.java ---------------------------------------------------------------------- diff --cc engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeDictionaryStep.java index 4ca132c,58b2c02..58b2c02 mode 100644,100755..100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeDictionaryStep.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeDictionaryStep.java http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/NDCuboidMapper.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/b9e1108e/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java ---------------------------------------------------------------------- diff --cc engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java index 94435f5,15f6424..5d5b930 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubingByLayer.java @@@ -181,10 -191,11 +181,10 @@@ public class SparkCubingByLayer extend reducerFunction2 = new CuboidReducerFunction2(cubeName, metaUrl, needAggr); } - final int totalLevels = cubeDesc.getBuildLevel(); + final int totalLevels = cubeSegment.getCuboidScheduler().getBuildLevel(); JavaPairRDD<ByteArray, Object[]>[] allRDDs = new JavaPairRDD[totalLevels + 1]; int level = 0; - long baseRDDSize = SizeEstimator.estimate(encodedBaseRDD) / (1024 * 1024); - int partition = estimateRDDPartitionNum(level, cubeStatsReader, envConfig, (int) baseRDDSize); + int partition = estimateRDDPartitionNum(level, cubeStatsReader, envConfig); // aggregate to calculate base cuboid allRDDs[0] = encodedBaseRDD.reduceByKey(baseCuboidReducerFunction, partition).persist(storageLevel);