KYLIN-2826 add basic support classes for cube planner algorithms Signed-off-by: Li Yang <liy...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e2029dd9 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e2029dd9 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e2029dd9 Branch: refs/heads/master Commit: e2029dd9a6de405d5ab32f05f0a92e9f228abef3 Parents: 2edee49 Author: Zhong <nju_y...@apache.org> Authored: Tue Aug 22 14:57:29 2017 +0800 Committer: Li Yang <liy...@apache.org> Committed: Sat Sep 23 18:16:48 2017 +0800 ---------------------------------------------------------------------- .../kylin/cube/cuboid/TreeCuboidScheduler.java | 3 +- .../algorithm/AbstractRecommendAlgorithm.java | 82 + .../cube/cuboid/algorithm/BPUSCalculator.java | 165 + .../cube/cuboid/algorithm/BenefitPolicy.java | 41 + .../cuboid/algorithm/CuboidBenefitModel.java | 210 + .../algorithm/CuboidRecommendAlgorithm.java | 49 + .../cube/cuboid/algorithm/CuboidStats.java | 272 ++ .../cube/cuboid/algorithm/CuboidStatsUtil.java | 166 + .../cube/cuboid/algorithm/PBPUSCalculator.java | 53 + .../cube/cuboid/algorithm/SPBPUSCalculator.java | 41 + .../cuboid/algorithm/AlgorithmTestBase.java | 310 ++ .../cuboid/algorithm/CuboidStatsUtilTest.java | 161 + core-cube/src/test/resources/statistics.txt | 4092 ++++++++++++++++++ 13 files changed, 5643 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java index 8e3b4dc..903e358 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/TreeCuboidScheduler.java @@ -89,8 +89,7 @@ public class TreeCuboidScheduler extends CuboidScheduler { return createFromCuboids(allCuboidIds, Cuboid.cuboidSelectComparator); } - @VisibleForTesting - static CuboidTree createFromCuboids(List<Long> allCuboidIds, Comparator<Long> cuboidComparator) { + public static CuboidTree createFromCuboids(List<Long> allCuboidIds, Comparator<Long> cuboidComparator) { // sort the cuboid ids in descending order, so that don't need to adjust // the cuboid tree when adding cuboid id to the tree. Collections.sort(allCuboidIds, new Comparator<Long>() { http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java new file mode 100755 index 0000000..b35c738 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/AbstractRecommendAlgorithm.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.concurrent.atomic.AtomicBoolean; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractRecommendAlgorithm implements CuboidRecommendAlgorithm { + private static final Logger logger = LoggerFactory.getLogger(AbstractRecommendAlgorithm.class); + + private CuboidStats cuboidStats; + private BenefitPolicy benefitPolicy; + + private AtomicBoolean cancelRequested = new AtomicBoolean(false); + private AtomicBoolean canceled = new AtomicBoolean(false); + + private long timeoutMillis; + + public AbstractRecommendAlgorithm(final long timeout, BenefitPolicy benefitPolicy, CuboidStats cuboidStats) { + if (timeout <= 0) { + this.timeoutMillis = Long.MAX_VALUE; + } else { + this.timeoutMillis = timeout; + } + this.cuboidStats = cuboidStats; + this.benefitPolicy = benefitPolicy; + } + + @Override + public void cancel() { + cancelRequested.set(true); + } + + /** + * Checks whether the algorithm has been canceled or timed out. + * + */ + protected boolean shouldCancel() { + if (canceled.get()) { + return true; + } + if (cancelRequested.get()) { + canceled.set(true); + cancelRequested.set(false); + logger.warn("Algorithm is canceled."); + return true; + } + final long currentTimeMillis = System.currentTimeMillis(); + if (currentTimeMillis > timeoutMillis) { + canceled.set(true); + logger.warn("Algorithm exceeds time limit."); + return true; + } + return false; + } + + public CuboidStats getCuboidStats() { + return cuboidStats; + } + + public BenefitPolicy getBenefitPolicy() { + return benefitPolicy; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java new file mode 100755 index 0000000..6d0b654 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BPUSCalculator.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +/** + * Calculate the benefit based on Benefit Per Unit Space. + */ +public class BPUSCalculator implements BenefitPolicy { + + private static Logger logger = LoggerFactory.getLogger(BPUSCalculator.class); + + protected CuboidStats cuboidStats; + protected Map<Long, Long> cuboidAggCostMap; + + public BPUSCalculator(CuboidStats cuboidStats) { + this.cuboidStats = cuboidStats; + this.cuboidAggCostMap = Maps.newHashMap(); + } + + @Override + public void initBeforeStart() { + cuboidAggCostMap.clear(); + //Initialize stats for mandatory cuboids + for (Long cuboid : cuboidStats.getAllCuboidsForMandatory()) { + if (getCuboidCost(cuboid) != null) { + cuboidAggCostMap.put(cuboid, getCuboidCost(cuboid)); + } + } + Set<Long> mandatoryCuboidSetWithStats = cuboidAggCostMap.keySet(); + //Initialize stats for selection cuboids + long baseCuboidCost = getCuboidCost(cuboidStats.getBaseCuboid()); + for (Long cuboid : cuboidStats.getAllCuboidsForSelection()) { + long leastCost = baseCuboidCost; + for (Long cuboidTarget : mandatoryCuboidSetWithStats) { + if ((cuboid | cuboidTarget) == cuboidTarget) { + if (leastCost > cuboidAggCostMap.get(cuboidTarget)) { + leastCost = cuboidAggCostMap.get(cuboidTarget); + } + } + } + cuboidAggCostMap.put(cuboid, leastCost); + } + } + + @Override + public CuboidBenefitModel.BenefitModel calculateBenefit(long cuboid, Set<Long> selected) { + double totalCostSaving = 0; + int benefitCount = 0; + for (Long descendant : cuboidStats.getAllDescendants(cuboid)) { + if (!selected.contains(descendant)) { + double costSaving = getCostSaving(descendant, cuboid); + if (costSaving > 0) { + totalCostSaving += costSaving; + benefitCount++; + } + } + } + + double spaceCost = calculateSpaceCost(cuboid); + double benefitPerUnitSpace = totalCostSaving / spaceCost; + return new CuboidBenefitModel.BenefitModel(benefitPerUnitSpace, benefitCount); + } + + @Override + public CuboidBenefitModel.BenefitModel calculateBenefitTotal(List<Long> cuboidsToAdd, Set<Long> selected) { + Set<Long> selectedInner = Sets.newHashSet(selected); + Map<Long, Long> cuboidAggCostMapSnapshot = Maps.newHashMap(cuboidAggCostMap); + for (Long cuboid : cuboidsToAdd) { + selectedInner.add(cuboid); + propagateAggregationCost(cuboid, selectedInner); + } + double totalCostSaving = 0; + int benefitCount = 0; + for (Long cuboid : cuboidAggCostMap.keySet()) { + if (cuboidAggCostMap.get(cuboid) < cuboidAggCostMapSnapshot.get(cuboid)) { + totalCostSaving += cuboidAggCostMapSnapshot.get(cuboid) - cuboidAggCostMap.get(cuboid); + benefitCount++; + } + } + cuboidAggCostMap = cuboidAggCostMapSnapshot; + + double benefitPerUnitSpace = totalCostSaving; + return new CuboidBenefitModel.BenefitModel(benefitPerUnitSpace, benefitCount); + } + + protected double getCostSaving(long descendant, long cuboid) { + long cuboidCost = getCuboidCost(cuboid); + long descendantAggCost = getCuboidAggregationCost(descendant); + return descendantAggCost - cuboidCost; + } + + protected Long getCuboidCost(long cuboid) { + return cuboidStats.getCuboidCount(cuboid); + } + + private long getCuboidAggregationCost(long cuboid) { + return cuboidAggCostMap.get(cuboid); + } + + @Override + public boolean ifEfficient(CuboidBenefitModel best) { + if (best.getBenefit() < getMinBenefitRatio()) { + logger.info(String.format("The recommended cuboid %s doesn't meet minimum benifit ratio %f", best, + getMinBenefitRatio())); + return false; + } + return true; + } + + public double getMinBenefitRatio() { + return 0.01; + } + + @Override + public void propagateAggregationCost(long cuboid, Set<Long> selected) { + long aggregationCost = getCuboidCost(cuboid); + Set<Long> childrenCuboids = cuboidStats.getAllDescendants(cuboid); + for (Long child : childrenCuboids) { + if (!selected.contains(child) && (aggregationCost < getCuboidAggregationCost(child))) { + cuboidAggCostMap.put(child, aggregationCost); + } + } + } + + /** + * Return the space cost of building a cuboid. + * + */ + public double calculateSpaceCost(long cuboid) { + return cuboidStats.getCuboidCount(cuboid); + } + + @Override + public BenefitPolicy getInstance() { + BPUSCalculator bpusCalculator = new BPUSCalculator(this.cuboidStats); + bpusCalculator.cuboidAggCostMap.putAll(this.cuboidAggCostMap); + return bpusCalculator; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java new file mode 100755 index 0000000..43bd3af --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/BenefitPolicy.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.List; +import java.util.Set; + +/** +* Calculate the benefit of building the cuboid, the already selected cuboids will affect the benefits of the given cuboid +* +*/ +public interface BenefitPolicy { + + public BenefitPolicy getInstance(); + + public void initBeforeStart(); + + public CuboidBenefitModel.BenefitModel calculateBenefit(long cuboid, Set<Long> selected); + + public CuboidBenefitModel.BenefitModel calculateBenefitTotal(List<Long> cuboidsToAdd, Set<Long> selected); + + public boolean ifEfficient(CuboidBenefitModel best); + + public void propagateAggregationCost(long cuboid, Set<Long> selected); +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java new file mode 100755 index 0000000..42f1ecf --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidBenefitModel.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +public class CuboidBenefitModel { + private CuboidModel cuboidModel; + private BenefitModel benefitModel; + + public CuboidBenefitModel(CuboidModel cuboidModel, BenefitModel benefitModel) { + this.cuboidModel = cuboidModel; + this.benefitModel = benefitModel; + } + + public void reset(CuboidModel cuboidModel, BenefitModel benefitModel) { + this.cuboidModel = cuboidModel; + this.benefitModel = benefitModel; + } + + public Long getCuboidId() { + return cuboidModel == null ? null : cuboidModel.getCuboidId(); + } + + public Double getBenefit() { + return benefitModel == null ? null : benefitModel.getBenefit(); + } + + @Override + public String toString() { + return "CuboidBenefitModel [cuboidModel=" + cuboidModel + ", benefitModel=" + benefitModel + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((cuboidModel == null) ? 0 : cuboidModel.hashCode()); + result = prime * result + ((benefitModel == null) ? 0 : benefitModel.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + CuboidBenefitModel other = (CuboidBenefitModel) obj; + if (cuboidModel == null) { + if (other.cuboidModel != null) + return false; + } else if (!cuboidModel.equals(other.cuboidModel)) + return false; + if (benefitModel == null) { + if (other.benefitModel != null) + return false; + } else if (!benefitModel.equals(other.benefitModel)) + return false; + return true; + } + + public static class CuboidModel { + private long cuboidId; + + private long recordCount; + private double spaceSize; + + private double hitProbability; + private long scanCount; + + public CuboidModel(long cuboId, long recordCount, double spaceSize, double hitProbability, long scanCount) { + this.cuboidId = cuboId; + this.recordCount = recordCount; + this.spaceSize = spaceSize; + this.hitProbability = hitProbability; + this.scanCount = scanCount; + } + + public long getCuboidId() { + return cuboidId; + } + + public long getRecordCount() { + return recordCount; + } + + public double getSpaceSize() { + return spaceSize; + } + + public double getHitProbability() { + return hitProbability; + } + + public long getScanCount() { + return scanCount; + } + + @Override + public String toString() { + return "CuboidModel [cuboidId=" + cuboidId + ", recordCount=" + recordCount + ", spaceSize=" + spaceSize + + ", hitProbability=" + hitProbability + ", scanCount=" + scanCount + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (int) (cuboidId ^ (cuboidId >>> 32)); + result = prime * result + (int) (recordCount ^ (recordCount >>> 32)); + long temp; + temp = Double.doubleToLongBits(spaceSize); + result = prime * result + (int) (temp ^ (temp >>> 32)); + temp = Double.doubleToLongBits(hitProbability); + result = prime * result + (int) (temp ^ (temp >>> 32)); + result = prime * result + (int) (scanCount ^ (scanCount >>> 32)); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + + CuboidModel other = (CuboidModel) obj; + if (cuboidId != other.cuboidId) + return false; + if (recordCount != other.recordCount) + return false; + if (Double.doubleToLongBits(spaceSize) != Double.doubleToLongBits(other.spaceSize)) + return false; + if (hitProbability != other.hitProbability) + return false; + if (scanCount != other.scanCount) + return false; + return true; + } + } + + public static class BenefitModel { + private double benefit; + private int benefitCount; + + public BenefitModel(double benefit, int benefitCount) { + this.benefit = benefit; + this.benefitCount = benefitCount; + } + + public double getBenefit() { + return benefit; + } + + public int getBenefitCount() { + return benefitCount; + } + + @Override + public String toString() { + return "BenefitModel [benefit=" + benefit + ", benefitCount=" + benefitCount + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + long temp; + temp = Double.doubleToLongBits(benefit); + result = prime * result + (int) (temp ^ (temp >>> 32)); + result = prime * result + benefitCount; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + BenefitModel other = (BenefitModel) obj; + if (Double.doubleToLongBits(benefit) != Double.doubleToLongBits(other.benefit)) + return false; + if (benefitCount != other.benefitCount) + return false; + return true; + } + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java new file mode 100755 index 0000000..b9f39ec --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommendAlgorithm.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.List; + +/** + * Algorithm to calculate the cuboid benifit and recommend cost-effective cuboid list based on the cube statistics. + */ +public interface CuboidRecommendAlgorithm { + + /** + * Return a list of recommended cuboids for the building segment based on expansionRate + * + */ + List<Long> recommend(double expansionRate); + + /** + * Start the Algorithm running + * + * @param maxSpaceLimit + * @return + */ + List<Long> start(double maxSpaceLimit); + + /** + * Cancel the Algorithm running + * + * Users can call this method from another thread to can the Algorithm and return the result earlier + * + */ + void cancel(); +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java new file mode 100755 index 0000000..1775d5a --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStats.java @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.Map; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +public class CuboidStats { + private static final Logger logger = LoggerFactory.getLogger(CuboidStats.class); + + public static class Builder { + + private static final long THRESHOLD_ROLL_UP_FOR_MANDATORY = 1000L; + + // Required parameters + private String key; + private Long baseCuboid; + private Map<Long, Long> statistics; + private Map<Long, Double> size; + + // Optional parameters - initialized to default values + private Set<Long> mandatoryCuboids = null; + //// These two properties are for generating mandatory cuboids + private Map<Long, Map<Long, Long>> rollingUpCountSourceMap = null; + private Long rollUpThresholdForMandatory = null; + + private Map<Long, Long> hitFrequencyMap = null; + private Map<Long, Map<Long, Long>> scanCountSourceMap = null; + + public Builder(String key, Long baseCuboid, Map<Long, Long> statistics, Map<Long, Double> size) { + this.key = key; + this.baseCuboid = baseCuboid; + this.statistics = statistics; + this.size = size; + } + + public Builder setRollingUpCountSourceMap(Map<Long, Map<Long, Long>> rollingUpCountSourceMap) { + this.rollingUpCountSourceMap = rollingUpCountSourceMap; + this.rollUpThresholdForMandatory = THRESHOLD_ROLL_UP_FOR_MANDATORY; + return this; + } + + public Builder setRollingUpCountSourceMap(Map<Long, Map<Long, Long>> rollingUpCountSourceMap, + long rollUpThresholdForMandatory) { + this.rollingUpCountSourceMap = rollingUpCountSourceMap; + this.rollUpThresholdForMandatory = rollUpThresholdForMandatory; + return this; + } + + public Builder setMandatoryCuboids(Set<Long> mandatoryCuboids) { + this.mandatoryCuboids = mandatoryCuboids; + return this; + } + + public Builder setHitFrequencyMap(Map<Long, Long> hitFrequencyMap) { + this.hitFrequencyMap = hitFrequencyMap; + return this; + } + + public Builder setScanCountSourceMap(Map<Long, Map<Long, Long>> scanCountSourceMap) { + this.scanCountSourceMap = scanCountSourceMap; + return this; + } + + public CuboidStats build() { + Preconditions.checkNotNull(key, "key should not be null"); + Preconditions.checkNotNull(baseCuboid, "baseCuboid should not be null"); + Preconditions.checkNotNull(statistics, "statistics should not be null"); + Preconditions.checkNotNull(size, "size should not be null"); + Preconditions.checkNotNull(statistics.get(baseCuboid), + "row count should exist for base cuboid " + baseCuboid); + Preconditions.checkState(statistics.keySet().equals(size.keySet()), + "statistics & size should own the same key set"); + if (mandatoryCuboids == null) { + mandatoryCuboids = Sets.newHashSet(); + } + if (rollingUpCountSourceMap != null) { + mandatoryCuboids.addAll(CuboidStatsUtil.generateMandatoryCuboidSet(statistics, hitFrequencyMap, + rollingUpCountSourceMap, rollUpThresholdForMandatory)); + } + + return new CuboidStats(key, baseCuboid, mandatoryCuboids, statistics, size, hitFrequencyMap, + scanCountSourceMap); + } + } + + private static final double WEIGHT_FOR_UN_QUERY = 0.2; + + private String key; + private long baseCuboid; + private ImmutableSet<Long> mandatoryCuboidSet; + private ImmutableSet<Long> selectionCuboidSet; + private ImmutableMap<Long, Long> cuboidCountMap; + private ImmutableMap<Long, Double> cuboidSizeMap; + private ImmutableMap<Long, Double> cuboidHitProbabilityMap; + private ImmutableMap<Long, Long> cuboidScanCountMap; + + private ImmutableMap<Long, Set<Long>> allDescendantsCache; + + private CuboidStats(String key, long baseCuboidId, Set<Long> mandatoryCuboids, Map<Long, Long> statistics, + Map<Long, Double> size, Map<Long, Long> hitFrequencyMap, Map<Long, Map<Long, Long>> scanCountSourceMap) { + + this.key = key; + this.baseCuboid = baseCuboidId; + /** Initial mandatory cuboids */ + Set<Long> cuboidsForMandatory = Sets.newHashSet(mandatoryCuboids); + //Always add base cuboid. + if (!cuboidsForMandatory.contains(baseCuboid)) { + cuboidsForMandatory.add(baseCuboid); + } + logger.info("Mandatory cuboids: " + cuboidsForMandatory); + + /** Initial selection cuboids */ + Set<Long> cuboidsForSelection = Sets.newHashSet(statistics.keySet()); + cuboidsForSelection.removeAll(cuboidsForMandatory); + + //There's no overlap between mandatoryCuboidSet and selectionCuboidSet + this.mandatoryCuboidSet = ImmutableSet.<Long> builder().addAll(cuboidsForMandatory).build(); + this.selectionCuboidSet = ImmutableSet.<Long> builder().addAll(cuboidsForSelection).build(); + if (selectionCuboidSet.isEmpty()) { + logger.warn("The selection set should not be empty!!!"); + } + + /** Initialize row count for mandatory cuboids */ + CuboidStatsUtil.complementRowCountForMandatoryCuboids(statistics, baseCuboid, mandatoryCuboidSet); + + this.cuboidCountMap = ImmutableMap.<Long, Long> builder().putAll(statistics).build(); + this.cuboidSizeMap = ImmutableMap.<Long, Double> builder().putAll(size).build(); + + /** Initialize the hit probability for each selection cuboid */ + Map<Long, Double> tmpCuboidHitProbabilityMap = Maps.newHashMapWithExpectedSize(selectionCuboidSet.size()); + if (hitFrequencyMap != null) { + long totalHitFrequency = 0L; + for (Map.Entry<Long, Long> hitFrequency : hitFrequencyMap.entrySet()) { + if (selectionCuboidSet.contains(hitFrequency.getKey())) { + totalHitFrequency += hitFrequency.getValue(); + } + } + + final double unitUncertainProb = WEIGHT_FOR_UN_QUERY / selectionCuboidSet.size(); + for (Long cuboid : selectionCuboidSet) { + //Calculate hit probability for each cuboid + if (hitFrequencyMap.get(cuboid) != null) { + tmpCuboidHitProbabilityMap.put(cuboid, unitUncertainProb + + (1 - WEIGHT_FOR_UN_QUERY) * hitFrequencyMap.get(cuboid) / totalHitFrequency); + } else { + tmpCuboidHitProbabilityMap.put(cuboid, unitUncertainProb); + } + } + } else { + for (Long cuboid : selectionCuboidSet) { + tmpCuboidHitProbabilityMap.put(cuboid, 1.0 / selectionCuboidSet.size()); + } + } + this.cuboidHitProbabilityMap = ImmutableMap.<Long, Double> builder().putAll(tmpCuboidHitProbabilityMap).build(); + + /** Initialize the scan count when query for each selection cuboid + one base cuboid */ + Map<Long, Long> tmpCuboidScanCountMap = Maps.newHashMapWithExpectedSize(1 + selectionCuboidSet.size()); + tmpCuboidScanCountMap.put(baseCuboid, getExpScanCount(baseCuboid, statistics, scanCountSourceMap)); + for (Long cuboid : selectionCuboidSet) { + tmpCuboidScanCountMap.put(cuboid, getExpScanCount(cuboid, statistics, scanCountSourceMap)); + } + this.cuboidScanCountMap = ImmutableMap.<Long, Long> builder().putAll(tmpCuboidScanCountMap).build(); + + this.allDescendantsCache = ImmutableMap.<Long, Set<Long>> builder() + .putAll(CuboidStatsUtil.createAllDescendantsCache(statistics.keySet())).build(); + } + + private long getExpScanCount(long sourceCuboid, Map<Long, Long> statistics, + Map<Long, Map<Long, Long>> scanCountSourceMap) { + Preconditions.checkNotNull(statistics.get(sourceCuboid), + "The statistics for source cuboid " + sourceCuboid + " does not exist!!!"); + if (scanCountSourceMap == null || scanCountSourceMap.get(sourceCuboid) == null + || scanCountSourceMap.get(sourceCuboid).size() <= 0) { + return statistics.get(sourceCuboid); + } else { + //TODO some improvement can be done by assigning weights based on distance between source cuboid and target cuboid + Map<Long, Long> scanCountTargetMap = scanCountSourceMap.get(sourceCuboid); + long totalEstScanCount = 0L; + for (Map.Entry<Long, Long> subEntry : scanCountTargetMap.entrySet()) { + long targetCuboid = subEntry.getKey(); + Preconditions.checkNotNull(statistics.get(targetCuboid), + "The statistics for target cuboid " + targetCuboid + " does not exist!!!"); + // Consider the ratio of row count between source cuboid and target cuboid + totalEstScanCount += subEntry.getValue() * statistics.get(sourceCuboid) / statistics.get(targetCuboid); + } + return totalEstScanCount / scanCountTargetMap.size(); + } + } + + public Set<Long> getAllDescendants(long cuboid) { + Set<Long> allDescendants = Sets.newLinkedHashSet(); + if (selectionCuboidSet.contains(cuboid)) { + return allDescendantsCache.get(cuboid); + } + return allDescendants; + } + + public Set<Long> getAllCuboidsForSelection() { + return selectionCuboidSet; + } + + public Set<Long> getAllCuboidsForMandatory() { + return mandatoryCuboidSet; + } + + public Long getCuboidQueryCost(long cuboid) { + return cuboidScanCountMap.get(cuboid); + } + + public Long getCuboidCount(long cuboid) { + return cuboidCountMap.get(cuboid); + } + + public Double getCuboidSize(long cuboid) { + return cuboidSizeMap.get(cuboid); + } + + public double getCuboidHitProbability(long cuboid) { + if (mandatoryCuboidSet.contains(cuboid)) { + return 1; + } else { + return cuboidHitProbabilityMap.get(cuboid) == null ? 0 : cuboidHitProbabilityMap.get(cuboid); + } + } + + public Map<Long, Long> getStatistics() { + return cuboidCountMap; + } + + public double getBaseCuboidSize() { + return getCuboidSize(baseCuboid); + } + + public long getBaseCuboid() { + return baseCuboid; + } + + public String getKey() { + return key; + } + + public CuboidBenefitModel.CuboidModel getCuboidModel(long cuboid) { + return new CuboidBenefitModel.CuboidModel(cuboid, getCuboidCount(cuboid), getCuboidSize(cuboid), + getCuboidHitProbability(cuboid), getCuboidQueryCost(cuboid)); + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java new file mode 100644 index 0000000..6d5bbe5 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtil.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +public class CuboidStatsUtil { + + /** + * For generating mandatory cuboids, + * a cuboid is mandatory if the expectation of rolling up count exceeds a threshold + * */ + public static Set<Long> generateMandatoryCuboidSet(Map<Long, Long> statistics, Map<Long, Long> hitFrequencyMap, + Map<Long, Map<Long, Long>> rollingUpCountSourceMap, final long rollUpThresholdForMandatory) { + Set<Long> mandatoryCuboidSet = Sets.newHashSet(); + if (hitFrequencyMap == null || hitFrequencyMap.isEmpty() || rollingUpCountSourceMap == null + || rollingUpCountSourceMap.isEmpty()) { + return mandatoryCuboidSet; + } + long totalHitFrequency = 0L; + for (long hitFrequency : hitFrequencyMap.values()) { + totalHitFrequency += hitFrequency; + } + + if (totalHitFrequency == 0) { + return mandatoryCuboidSet; + } + + for (Map.Entry<Long, Long> hitFrequency : hitFrequencyMap.entrySet()) { + long cuboid = hitFrequency.getKey(); + if (statistics.get(cuboid) != null) { + continue; + } + if (rollingUpCountSourceMap.get(cuboid) == null || rollingUpCountSourceMap.get(cuboid).isEmpty()) { + continue; + } + long totalEstScanCount = 0L; + for (long estScanCount : rollingUpCountSourceMap.get(cuboid).values()) { + totalEstScanCount += estScanCount; + } + totalEstScanCount /= rollingUpCountSourceMap.get(cuboid).size(); + if ((hitFrequency.getValue() * 1.0 / totalHitFrequency) + * totalEstScanCount >= rollUpThresholdForMandatory) { + mandatoryCuboidSet.add(cuboid); + } + } + return mandatoryCuboidSet; + } + + /** + * Complement row count for mandatory cuboids + * with its best parent's row count + * */ + public static void complementRowCountForMandatoryCuboids(Map<Long, Long> statistics, long baseCuboid, + Set<Long> mandatoryCuboidSet) { + // Sort entries order by row count asc + SortedSet<Map.Entry<Long, Long>> sortedStatsSet = new TreeSet<Map.Entry<Long, Long>>( + new Comparator<Map.Entry<Long, Long>>() { + public int compare(Map.Entry<Long, Long> o1, Map.Entry<Long, Long> o2) { + return o1.getValue().compareTo(o2.getValue()); + } + }); + sortedStatsSet.addAll(statistics.entrySet()); + for (Long cuboid : mandatoryCuboidSet) { + if (statistics.get(cuboid) == null) { + // Get estimate row count for mandatory cuboid + long tmpRowCount = -1; + for (Map.Entry<Long, Long> entry : sortedStatsSet) { + if (isDescendant(cuboid, entry.getKey())) { + tmpRowCount = entry.getValue(); + } + } + statistics.put(cuboid, tmpRowCount < 0 ? statistics.get(baseCuboid) : tmpRowCount); + } + } + } + + /** Using dynamic programming to use extra space to reduce repetitive computation*/ + public static Map<Long, Set<Long>> createAllDescendantsCache(final Set<Long> cuboidSet) { + List<Long> latticeCuboidList = Lists.newArrayList(cuboidSet); + Collections.sort(latticeCuboidList); + + Map<Long, Set<Long>> allDescendantsCache = Maps.newHashMap(); + Set<Long> preNoneDescendants = Sets.newHashSet(); + for (int i = 0; i < latticeCuboidList.size(); i++) { + Long currentCuboid = latticeCuboidList.get(i); + Set<Long> descendants = Sets.newHashSet(currentCuboid); + Set<Long> curNoneDescendants = Sets.newHashSet(); + if (i > 0) { + long preCuboid = latticeCuboidList.get(i - 1); + if (isDescendant(preCuboid, currentCuboid)) { + descendants.addAll(allDescendantsCache.get(preCuboid)); + } else { + curNoneDescendants.add(preCuboid); + for (long cuboidToCheck : allDescendantsCache.get(preCuboid)) { + if (isDescendant(cuboidToCheck, currentCuboid)) { + descendants.addAll(allDescendantsCache.get(cuboidToCheck)); + } + } + } + } + for (long cuboidToCheck : preNoneDescendants) { + if (isDescendant(cuboidToCheck, currentCuboid)) { + descendants.addAll(allDescendantsCache.get(cuboidToCheck)); + } else { + curNoneDescendants.add(cuboidToCheck); + } + } + + allDescendantsCache.put(currentCuboid, descendants); + preNoneDescendants = curNoneDescendants; + } + + return allDescendantsCache; + } + + @VisibleForTesting + static Map<Long, Set<Long>> createAllDescendantsCache2(final Set<Long> cuboidSet) { + List<Long> latticeCuboidList = Lists.newArrayList(cuboidSet); + + Map<Long, Set<Long>> allDescendantsCache = Maps.newHashMap(); + for (int i = 0; i < latticeCuboidList.size(); i++) { + Long currentCuboid = latticeCuboidList.get(i); + Set<Long> descendantSet = Sets.newHashSet(currentCuboid); + for (int j = 0; j < i; j++) { + Long checkCuboid = latticeCuboidList.get(j); + if (isDescendant(checkCuboid, currentCuboid)) { + descendantSet.add(checkCuboid); + } + } + allDescendantsCache.put(currentCuboid, descendantSet); + } + return allDescendantsCache; + } + + public static boolean isDescendant(long cuboidToCheck, long parentCuboid) { + return (cuboidToCheck & parentCuboid) == cuboidToCheck; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java new file mode 100755 index 0000000..6d3ddc7 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/PBPUSCalculator.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import com.google.common.base.Preconditions; + +/** + * Calculate the benefit based on Benefit Per Unit Space with query probability distribution. + */ +public class PBPUSCalculator extends BPUSCalculator { + + public PBPUSCalculator(final CuboidStats cuboidStats) { + super(cuboidStats); + } + + @Override + protected double getCostSaving(long descendant, long cuboid) { + return getCuboidHitProbability(descendant) * super.getCostSaving(descendant, cuboid); + } + + protected double getCuboidHitProbability(long cuboid) { + return cuboidStats.getCuboidHitProbability(cuboid); + } + + public double getMinBenefitRatio() { + Preconditions.checkArgument(cuboidStats.getAllCuboidsForSelection().size() > 0, + "The set of cuboids for selection is empty!!!"); + return super.getMinBenefitRatio() / cuboidStats.getAllCuboidsForSelection().size(); + } + + @Override + public BenefitPolicy getInstance() { + PBPUSCalculator pbpusCalculator = new PBPUSCalculator(cuboidStats); + pbpusCalculator.cuboidAggCostMap.putAll(this.cuboidAggCostMap); + return pbpusCalculator; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java new file mode 100755 index 0000000..c89bf49 --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/SPBPUSCalculator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +/** + * Calculate the benefit based on Benefit Per Unit Space with query probability distribution and updated cost. + */ +public class SPBPUSCalculator extends PBPUSCalculator { + + public SPBPUSCalculator(final CuboidStats cuboidStats) { + super(cuboidStats); + } + + @Override + protected Long getCuboidCost(long cuboid) { + return cuboidStats.getCuboidQueryCost(cuboid); + } + + @Override + public BenefitPolicy getInstance() { + SPBPUSCalculator spbpusCalculator = new SPBPUSCalculator(cuboidStats); + spbpusCalculator.cuboidAggCostMap.putAll(this.cuboidAggCostMap); + return spbpusCalculator; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java new file mode 100755 index 0000000..02927c3 --- /dev/null +++ b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/AlgorithmTestBase.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.kylin.cube.cuboid.TreeCuboidScheduler.CuboidCostComparator; +import org.apache.kylin.cube.cuboid.TreeCuboidScheduler.CuboidTree; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +public class AlgorithmTestBase { + + public CuboidStats cuboidStats; + + private Set<Long> mandatoryCuboids; + + @Before + public void setUp() throws Exception { + + mandatoryCuboids = Sets.newHashSet(); + mandatoryCuboids.add(3000L); + mandatoryCuboids.add(1888L); + mandatoryCuboids.add(88L); + cuboidStats = new CuboidStats.Builder("test", 4095L, simulateCount(), simulateSpaceSize()) + .setMandatoryCuboids(mandatoryCuboids).setHitFrequencyMap(simulateHitFrequency()) + .setScanCountSourceMap(simulateScanCount()).build(); + } + + @After + public void after() throws Exception { + } + + @Test + public void testMandatoryRowCountEstimation() { + for (long mandatoryCuboid : mandatoryCuboids) { + System.out.println("Cuboid id: " + mandatoryCuboid + "; Row Count: " + + cuboidStats.getStatistics().get(mandatoryCuboid)); + } + } + + /** better if closer to 1, worse if closer to 0*/ + public double getQueryCostRatio(CuboidStats cuboidStats, List<Long> recommendList) { + CuboidTree cuboidTree = CuboidTree.createFromCuboids(recommendList, + new CuboidCostComparator(cuboidStats.getStatistics())); + double queryCostBest = 0; + for (Long cuboidId : cuboidStats.getAllCuboidsForSelection()) { + if (cuboidStats.getCuboidQueryCost(cuboidId) != null) { + queryCostBest += cuboidStats.getCuboidHitProbability(cuboidId) * cuboidStats.getCuboidCount(cuboidId); + // queryCostBest += cuboidStats.getCuboidHitProbability(cuboidId) * cuboidStats.getCuboidQueryCost(cuboidId); + } + } + + double queryCost = 0; + for (Long cuboidId : cuboidStats.getAllCuboidsForSelection()) { + long matchCuboidId = cuboidTree.findBestMatchCuboid(cuboidId); + if (cuboidStats.getCuboidQueryCost(matchCuboidId) != null) { + queryCost += cuboidStats.getCuboidHitProbability(cuboidId) * cuboidStats.getCuboidCount(matchCuboidId); + // queryCost += cuboidStats.getCuboidHitProbability(cuboidId) * cuboidStats.getCuboidQueryCost(matchCuboidId); + } + } + + return queryCostBest / queryCost; + } + + protected Map<Long, Long> simulateCount() { + Map<Long, Long> countMap = Maps.newHashMap(); + BufferedReader br = null; + + try { + + String sCurrentLine; + + br = new BufferedReader(new FileReader("src/test/resources/statistics.txt")); + + while ((sCurrentLine = br.readLine()) != null) { + String[] statPair = sCurrentLine.split(" "); + countMap.put(Long.valueOf(statPair[0]), Long.valueOf(statPair[1])); + } + + } catch (IOException e) { + e.printStackTrace(); + } finally { + try { + if (br != null) + br.close(); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + return countMap; + } + + protected Map<Long, Double> simulateSpaceSize() { + Map<Long, Double> sizeMap = Maps.newHashMap(); + Map<Long, Long> countMap = simulateCount(); + for (Map.Entry<Long, Long> entry : countMap.entrySet()) { + sizeMap.put(entry.getKey(), entry.getValue() * 1.0); + } + return sizeMap; + } + + protected Map<Long, Long> simulateHitFrequency() { + Map<Long, Long> hitFrequencyMap = Maps.newHashMap(); + + hitFrequencyMap.put(4095L, 10L); + hitFrequencyMap.put(3849L, 15L); + hitFrequencyMap.put(3780L, 31L); + + hitFrequencyMap.put(3459L, 16L); + hitFrequencyMap.put(3145L, 29L); + + hitFrequencyMap.put(2861L, 21L); + hitFrequencyMap.put(2768L, 40L); + + hitFrequencyMap.put(1528L, 10L); + hitFrequencyMap.put(1440L, 9L); + hitFrequencyMap.put(1152L, 21L); + + hitFrequencyMap.put(256L, 23L); + + hitFrequencyMap.put(128L, 7L); + hitFrequencyMap.put(272L, 8L); + hitFrequencyMap.put(288L, 10L); + hitFrequencyMap.put(384L, 2L); + hitFrequencyMap.put(320L, 3L); + hitFrequencyMap.put(432L, 5L); + hitFrequencyMap.put(258L, 8L); + hitFrequencyMap.put(336L, 10L); + hitFrequencyMap.put(274L, 22L); + hitFrequencyMap.put(488L, 41L); + hitFrequencyMap.put(352L, 10L); + + hitFrequencyMap.put(16L, 1L); + hitFrequencyMap.put(32L, 5L); + hitFrequencyMap.put(34L, 1L); + + hitFrequencyMap.put(2L, 21L); + + return hitFrequencyMap; + } + + protected Map<Long, Map<Long, Long>> simulateScanCount() { + Map<Long, Map<Long, Long>> scanCountMap = Maps.newLinkedHashMap(); + scanCountMap.put(4094L, new HashMap<Long, Long>() { + { + put(4095L, 1833041L); + } + }); + scanCountMap.put(3849L, new HashMap<Long, Long>() { + { + put(3849L, 276711L); + } + }); + scanCountMap.put(3780L, new HashMap<Long, Long>() { + { + put(3780L, 129199L); + } + }); + scanCountMap.put(3459L, new HashMap<Long, Long>() { + { + put(3459L, 168109L); + } + }); + scanCountMap.put(3145L, new HashMap<Long, Long>() { + { + put(3145L, 299991L); + } + }); + scanCountMap.put(2861L, new HashMap<Long, Long>() { + { + put(2861L, 2121L); + } + }); + scanCountMap.put(2768L, new HashMap<Long, Long>() { + { + put(2768L, 40231L); + } + }); + scanCountMap.put(256L, new HashMap<Long, Long>() { + { + put(256L, 1L); + } + }); + scanCountMap.put(16L, new HashMap<Long, Long>() { + { + put(16L, 1L); + } + }); + scanCountMap.put(32L, new HashMap<Long, Long>() { + { + put(32L, 2L); + } + }); + scanCountMap.put(128L, new HashMap<Long, Long>() { + { + put(128L, 3L); + } + }); + scanCountMap.put(272L, new HashMap<Long, Long>() { + { + put(272L, 2L); + } + }); + scanCountMap.put(288L, new HashMap<Long, Long>() { + { + put(288L, 3L); + } + }); + scanCountMap.put(2L, new HashMap<Long, Long>() { + { + put(2L, 1L); + } + }); + scanCountMap.put(384L, new HashMap<Long, Long>() { + { + put(384L, 2L); + } + }); + scanCountMap.put(320L, new HashMap<Long, Long>() { + { + put(320L, 3L); + } + }); + scanCountMap.put(432L, new HashMap<Long, Long>() { + { + put(432L, 5L); + } + }); + scanCountMap.put(1152L, new HashMap<Long, Long>() { + { + put(1152L, 21L); + } + }); + scanCountMap.put(258L, new HashMap<Long, Long>() { + { + put(258L, 2L); + } + }); + scanCountMap.put(1440L, new HashMap<Long, Long>() { + { + put(1440L, 9L); + } + }); + scanCountMap.put(336L, new HashMap<Long, Long>() { + { + put(336L, 2L); + } + }); + scanCountMap.put(336L, new HashMap<Long, Long>() { + { + put(336L, 2L); + } + }); + scanCountMap.put(274L, new HashMap<Long, Long>() { + { + put(274L, 1L); + } + }); + scanCountMap.put(488L, new HashMap<Long, Long>() { + { + put(488L, 16L); + } + }); + scanCountMap.put(352L, new HashMap<Long, Long>() { + { + put(352L, 3L); + } + }); + scanCountMap.put(1528L, new HashMap<Long, Long>() { + { + put(1528L, 21L); + } + }); + scanCountMap.put(34L, new HashMap<Long, Long>() { + { + put(34L, 1L); + } + }); + + return scanCountMap; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/e2029dd9/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java new file mode 100644 index 0000000..ca70820 --- /dev/null +++ b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid.algorithm; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.base.Stopwatch; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +public class CuboidStatsUtilTest { + + /** + * (11111111) + * / | \ + * 10011111 (11101111) 00110010 + * \ | / + * \ 11000111 / + * \ | / + * 00000110 / + * / | / + * 00000100 00000010 + * */ + private Set<Long> generateCuboidSet() { + return Sets.newHashSet(255L, 159L, 239L, 50L, 199L, 6L, 4L, 2L); + } + + private Map<Long, Long> simulateCount() { + Map<Long, Long> countMap = Maps.newHashMap(); + + countMap.put(255L, 10000L); + countMap.put(159L, 10000L); + countMap.put(50L, 10000L); + countMap.put(199L, 10000L); + countMap.put(6L, 10000L); + countMap.put(4L, 10000L); + countMap.put(2L, 10000L); + + return countMap; + } + + private Map<Long, Long> simulateHitFrequency() { + Map<Long, Long> hitFrequencyMap = Maps.newHashMap(); + + long totalHitFrequency = 10000L; + + hitFrequencyMap.put(239L, (long) (totalHitFrequency * 0.5)); + hitFrequencyMap.put(50L, (long) (totalHitFrequency * 0.2)); + hitFrequencyMap.put(2L, (long) (totalHitFrequency * 0.25)); + hitFrequencyMap.put(178L, (long) (totalHitFrequency * 0.05)); + + return hitFrequencyMap; + } + + private Map<Long, Map<Long, Long>> simulateRollingUpCount() { + Map<Long, Map<Long, Long>> rollingUpCountMap = Maps.newLinkedHashMap(); + + rollingUpCountMap.put(239L, new HashMap<Long, Long>() { + { + put(255L, 4000L); + } + }); + + rollingUpCountMap.put(178L, new HashMap<Long, Long>() { + { + put(255L, 5000L); + } + }); + + return rollingUpCountMap; + } + + @Test + public void isDescendantTest() { + Assert.assertTrue(CuboidStatsUtil.isDescendant(6L, 239L)); + Assert.assertTrue(!CuboidStatsUtil.isDescendant(4L, 50L)); + } + + @Test + public void generateMandatoryCuboidSetTest() { + Set<Long> mandatoryCuboidSet = CuboidStatsUtil.generateMandatoryCuboidSet(simulateCount(), + simulateHitFrequency(), simulateRollingUpCount(), 1000L); + Assert.assertTrue(mandatoryCuboidSet.contains(239L)); + Assert.assertTrue(!mandatoryCuboidSet.contains(178L)); + } + + @Test + public void complementRowCountForMandatoryCuboidsTest() { + Map<Long, Long> countMap = simulateCount(); + Set<Long> mandatoryCuboidSet = CuboidStatsUtil.generateMandatoryCuboidSet(countMap, simulateHitFrequency(), + simulateRollingUpCount(), 1000L); + for (long mandatoryCuboid : mandatoryCuboidSet) { + Assert.assertNull(countMap.get(mandatoryCuboid)); + } + CuboidStatsUtil.complementRowCountForMandatoryCuboids(countMap, 255L, mandatoryCuboidSet); + for (long mandatoryCuboid : mandatoryCuboidSet) { + Assert.assertNotNull(countMap.get(mandatoryCuboid)); + } + Assert.assertTrue(countMap.get(239L) == 10000L); + } + + @Test + public void createAllDescendantsCacheTest() { + Set<Long> cuboidSet = generateCuboidSet(); + Map<Long, Set<Long>> allDescendantsCache = CuboidStatsUtil.createAllDescendantsCache(cuboidSet); + + Assert.assertTrue(allDescendantsCache.get(255L).containsAll(cuboidSet)); + + Assert.assertTrue(allDescendantsCache.get(239L).size() == 5); + + Assert.assertTrue(allDescendantsCache.get(50L).containsAll(Sets.newHashSet(50L, 2L))); + Assert.assertTrue(!allDescendantsCache.get(50L).contains(4L)); + } + + private Set<Long> generateMassCuboidSet() { + Set<Long> cuboidSet = Sets.newHashSet(); + long maxCuboid = (1L << 16); + for (long i = 1; i < maxCuboid; i++) { + cuboidSet.add(i); + } + return cuboidSet; + } + + @Test + public void createAllDescendantsCacheStressTest() { + Stopwatch sw = new Stopwatch(); + sw.start(); + Set<Long> cuboidSet = generateMassCuboidSet(); + System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsed(TimeUnit.MILLISECONDS)); + sw.reset(); + sw.start(); + CuboidStatsUtil.createAllDescendantsCache(cuboidSet); + System.out.println("Time elapsed for creating descendants cache: " + sw.elapsed(TimeUnit.MILLISECONDS)); + sw.reset(); + sw.start(); + CuboidStatsUtil.createAllDescendantsCache2(cuboidSet); + System.out.println("Time elapsed for creating descendants cache2: " + sw.elapsed(TimeUnit.MILLISECONDS)); + } +}