KYLIN-2728 Introduce a new cuboid scheduler based on cuboid tree rather than static rules
Signed-off-by: Li Yang <liy...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/0eded0ad Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/0eded0ad Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/0eded0ad Branch: refs/heads/ranger Commit: 0eded0ad38a4a4b59f23e6cd5749fdb04b7c627f Parents: 465c507 Author: Zhong <nju_y...@apache.org> Authored: Thu Aug 17 20:40:35 2017 +0800 Committer: Li Yang <liy...@apache.org> Committed: Sun Sep 10 07:47:20 2017 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/common/util/JsonUtil.java | 4 + .../org/apache/kylin/cube/CubeInstance.java | 136 ++++++++++++++++- .../kylin/cube/cuboid/CuboidModeEnum.java | 48 ++++++ .../cube/cuboid/TreeCuboidSchedulerTest.java | 148 +++++++++++++++++++ 4 files changed, 333 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java b/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java index 4f3086a..5a81463 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java @@ -72,6 +72,10 @@ public class JsonUtil { return mapper.readValue(src, valueType); } + public static <T> T readValue(String content, TypeReference<T> valueTypeRef) throws IOException { + return mapper.readValue(content, valueTypeRef); + } + public static Map<String, String> readValueAsMap(String content) throws IOException { TypeReference<HashMap<String, String>> typeRef = new TypeReference<HashMap<String, String>>() { }; http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java index 246cbf6..fa270fd 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java @@ -18,15 +18,24 @@ package org.apache.kylin.cube; +import static org.apache.kylin.cube.cuboid.CuboidModeEnum.CURRENT; +import static org.apache.kylin.cube.cuboid.CuboidModeEnum.RECOMMEND; + import java.io.IOException; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.KylinConfigExt; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; +import org.apache.kylin.common.util.CompressionUtils; +import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.cube.cuboid.CuboidModeEnum; import org.apache.kylin.cube.cuboid.CuboidScheduler; +import org.apache.kylin.cube.cuboid.TreeCuboidScheduler; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.metadata.model.DataModelDesc; @@ -51,6 +60,7 @@ import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonManagedReference; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Objects; import com.google.common.collect.Lists; @@ -96,6 +106,15 @@ public class CubeInstance extends RootPersistentEntity implements IRealization, @JsonProperty("create_time_utc") private long createTimeUTC; + @JsonProperty("cuboidBytes") + private byte[] cuboidBytes; + + @JsonProperty("cuboid_bytes_recommend") + private byte[] cuboidBytesRecommend; + + @JsonProperty("last_optimized") + private long lastOptimized; + // cuboid scheduler lazy built transient private CuboidScheduler cuboidScheduler; @@ -109,7 +128,14 @@ public class CubeInstance extends RootPersistentEntity implements IRealization, synchronized (this) { if (cuboidScheduler == null) { - cuboidScheduler = getDescriptor().getInitialCuboidScheduler(); + Map<Long, Long> cuboidsWithRowCnt = getCuboids(); + if (cuboidsWithRowCnt == null) { + cuboidScheduler = getDescriptor().getInitialCuboidScheduler(); + } else { + cuboidScheduler = new TreeCuboidScheduler(getDescriptor(), + Lists.newArrayList(cuboidsWithRowCnt.keySet()), + new TreeCuboidScheduler.CuboidCostComparator(cuboidsWithRowCnt)); + } } } return cuboidScheduler; @@ -145,7 +171,8 @@ public class CubeInstance extends RootPersistentEntity implements IRealization, // in a temporary broken state, so that user can edit and fix it. Broken state is often due to // schema changes at source. public boolean allowBrokenDescriptor() { - return (getStatus() == RealizationStatusEnum.DISABLED || getStatus() == RealizationStatusEnum.DESCBROKEN) && segments.isEmpty(); + return (getStatus() == RealizationStatusEnum.DISABLED || getStatus() == RealizationStatusEnum.DESCBROKEN) + && segments.isEmpty(); } public String getResourcePath() { @@ -305,6 +332,108 @@ public class CubeInstance extends RootPersistentEntity implements IRealization, this.createTimeUTC = createTimeUTC; } + public Set<Long> getCuboidsByMode(String cuboidModeName) { + return getCuboidsByMode(cuboidModeName == null ? null : CuboidModeEnum.getByModeName(cuboidModeName)); + } + + public Set<Long> getCuboidsByMode(CuboidModeEnum cuboidMode) { + if (cuboidMode == null || cuboidMode == CURRENT) { + return getCuboidScheduler().getAllCuboidIds(); + } + Set<Long> cuboidsRecommend = getCuboidsRecommend(); + if (cuboidsRecommend == null || cuboidMode == RECOMMEND) { + return cuboidsRecommend; + } + Set<Long> currentCuboids = getCuboidScheduler().getAllCuboidIds(); + switch (cuboidMode) { + case RECOMMEND_EXISTING: + cuboidsRecommend.retainAll(currentCuboids); + return cuboidsRecommend; + case RECOMMEND_MISSING: + cuboidsRecommend.removeAll(currentCuboids); + return cuboidsRecommend; + case RECOMMEND_MISSING_WITH_BASE: + cuboidsRecommend.removeAll(currentCuboids); + currentCuboids.add(getCuboidScheduler().getBaseCuboidId()); + return cuboidsRecommend; + default: + return null; + } + } + + public Map<Long, Long> getCuboids() { + if (cuboidBytes == null) + return null; + byte[] uncompressed; + try { + uncompressed = CompressionUtils.decompress(cuboidBytes); + String str = new String(uncompressed, "UTF-8"); + TypeReference<Map<Long, Long>> typeRef = new TypeReference<Map<Long, Long>>() { + }; + Map<Long, Long> cuboids = JsonUtil.readValue(str, typeRef); + return cuboids.isEmpty() ? null : cuboids; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public void setCuboids(Map<Long, Long> cuboids) { + if (cuboids == null) + return; + if (cuboids.isEmpty()) { + cuboidBytes = null; + return; + } + + try { + String str = JsonUtil.writeValueAsString(cuboids); + byte[] compressed = CompressionUtils.compress(str.getBytes("UTF-8")); + cuboidBytes = compressed; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public Set<Long> getCuboidsRecommend() { + if (cuboidBytesRecommend == null) + return null; + byte[] uncompressed; + try { + uncompressed = CompressionUtils.decompress(cuboidBytesRecommend); + String str = new String(uncompressed, "UTF-8"); + TypeReference<Set<Long>> typeRef = new TypeReference<Set<Long>>() { + }; + Set<Long> cuboids = JsonUtil.readValue(str, typeRef); + return cuboids.isEmpty() ? null : cuboids; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public void setCuboidsRecommend(HashSet<Long> cuboids) { + if (cuboids == null) + return; + if (cuboids.isEmpty()) { + cuboidBytesRecommend = null; + return; + } + try { + String str = JsonUtil.writeValueAsString(cuboids); + byte[] compressed = CompressionUtils.compress(str.getBytes("UTF-8")); + cuboidBytesRecommend = compressed; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Get cuboid level count except base cuboid + * @return + */ + public int getBuildLevel() { + return getCuboidScheduler().getCuboidsByLayer().size() - 1; + } + @Override public CapabilityResult isCapable(SQLDigest digest) { CapabilityResult result = CubeCapabilityChecker.check(this, digest); @@ -379,7 +508,8 @@ public class CubeInstance extends RootPersistentEntity implements IRealization, if (!this.getDescriptor().getModel().getPartitionDesc().isPartitioned()) return false; - return this.getDescriptor().getAutoMergeTimeRanges() != null && this.getDescriptor().getAutoMergeTimeRanges().length > 0; + return this.getDescriptor().getAutoMergeTimeRanges() != null + && this.getDescriptor().getAutoMergeTimeRanges().length > 0; } public SegmentRange autoMergeCubeSegments() throws IOException { http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java new file mode 100644 index 0000000..f55c9db --- /dev/null +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid; + +import com.google.common.base.Strings; + +public enum CuboidModeEnum { + CURRENT("CURRENT"), RECOMMEND("RECOMMEND"), RECOMMEND_EXISTING("RECOMMEND_EXISTING"), RECOMMEND_MISSING( + "RECOMMEND_MISSING"), RECOMMEND_MISSING_WITH_BASE("RECOMMEND_MISSING_WITH_BASE"); + + private final String modeName; + + CuboidModeEnum(String modeName) { + this.modeName = modeName; + } + + public String toString() { + return modeName; + } + + public static CuboidModeEnum getByModeName(String modeName) { + if (Strings.isNullOrEmpty(modeName)) { + return null; + } + for (CuboidModeEnum mode : CuboidModeEnum.values()) { + if (mode.modeName.equals(modeName.toUpperCase())) { + return mode; + } + } + return null; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java new file mode 100644 index 0000000..41fa807 --- /dev/null +++ b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube.cuboid; + +import static org.junit.Assert.assertEquals; + +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.kylin.cube.cuboid.TreeCuboidScheduler.CuboidTree; +import org.junit.Test; + +import com.google.common.collect.Maps; + +public class TreeCuboidSchedulerTest { + + @Test + public void testCreateCuboidTree() { + long basicCuboid = getBaseCuboid(10); + List<Long> cuboids = genRandomCuboids(basicCuboid, 200); + CuboidTree cuboidTree = CuboidTree.createFromCuboids(cuboids); + PrintWriter out = new PrintWriter(System.out); + cuboidTree.print(out); + out.flush(); + } + + @Test + public void testSpanningChild() { + long basicCuboid = getBaseCuboid(10); + List<Long> cuboids = genRandomCuboids(basicCuboid, 50); + long testCuboid = cuboids.get(10); + System.out.println(cuboids); + CuboidTree cuboidTree = CuboidTree.createFromCuboids(cuboids); + PrintWriter out = new PrintWriter(System.out); + cuboidTree.print(out); + out.flush(); + + List<Long> spanningChildren = cuboidTree.getSpanningCuboid(testCuboid); + System.out.println(testCuboid + ":" + spanningChildren); + } + + @Test + public void testFindBestMatchCuboid() { + CuboidTree cuboidTree = createCuboidTree1(); + PrintWriter out = new PrintWriter(System.out); + cuboidTree.print(out); + out.flush(); + + assertEquals(503L, cuboidTree.findBestMatchCuboid(503L)); + + long bestMatch1 = cuboidTree.findBestMatchCuboid(Long.parseLong("100000000", 2)); + assertEquals(263, bestMatch1); + + long bestMatch2 = cuboidTree.findBestMatchCuboid(Long.parseLong("100010000", 2)); + assertEquals(304, bestMatch2); + } + + private List<Long> genRandomCuboids(long basicCuboidId, int count) { + Random random = new Random(); + List<Long> result = new ArrayList<>(); + result.add(basicCuboidId); + for (int i = 0; i < count; i++) { + result.add(random.nextLong() & basicCuboidId); + } + return result; + } + + private long getBaseCuboid(int dimensionCnt) { + if (dimensionCnt > 64) { + throw new IllegalArgumentException("the dimension count cannot exceed 64"); + } + long result = 0; + for (int i = 0; i < dimensionCnt; i++) { + result |= (1 << i); + } + return result; + } + + private CuboidTree createCuboidTree1() { + List<Long> cuboids = Arrays.asList(504L, 511L, 447L, 383L, 503L, 440L, 496L, 376L, 439L, 487L, 375L, 319L, 432L, + 480L, 368L, 312L, 423L, 455L, 311L, 359L, 416L, 448L, 304L, 352L, 391L, 295L, 327L, 384L, 288L, 320L, + 263L); + return CuboidTree.createFromCuboids(cuboids, + new TreeCuboidScheduler.CuboidCostComparator(simulateStatistics())); + } + + private Map<Long, Long> simulateStatistics() { + Map<Long, Long> countMap = Maps.newHashMap(); + countMap.put(511L, 1000000L); + + countMap.put(504L, 900000L); + countMap.put(447L, 990000L); + countMap.put(383L, 991000L); + countMap.put(503L, 980000L); + + countMap.put(440L, 800000L); + countMap.put(496L, 890000L); + countMap.put(376L, 891000L); + countMap.put(439L, 751000L); + countMap.put(487L, 751000L); + countMap.put(375L, 741000L); + countMap.put(319L, 740000L); + + countMap.put(432L, 600000L); + countMap.put(480L, 690000L); + countMap.put(368L, 691000L); + countMap.put(312L, 651000L); + countMap.put(423L, 651000L); + countMap.put(455L, 541000L); + countMap.put(311L, 540000L); + countMap.put(359L, 530000L); + + countMap.put(416L, 400000L); + countMap.put(448L, 490000L); + countMap.put(304L, 491000L); + countMap.put(352L, 451000L); + countMap.put(391L, 351000L); + countMap.put(295L, 141000L); + countMap.put(327L, 240000L); + + countMap.put(384L, 100000L); + countMap.put(288L, 90000L); + countMap.put(320L, 91000L); + countMap.put(263L, 51000L); + + return countMap; + } +}