KYLIN-2728 Introduce a new cuboid scheduler based on cuboid tree rather than 
static rules

Signed-off-by: Li Yang <liy...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/0eded0ad
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/0eded0ad
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/0eded0ad

Branch: refs/heads/ranger
Commit: 0eded0ad38a4a4b59f23e6cd5749fdb04b7c627f
Parents: 465c507
Author: Zhong <nju_y...@apache.org>
Authored: Thu Aug 17 20:40:35 2017 +0800
Committer: Li Yang <liy...@apache.org>
Committed: Sun Sep 10 07:47:20 2017 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/util/JsonUtil.java  |   4 +
 .../org/apache/kylin/cube/CubeInstance.java     | 136 ++++++++++++++++-
 .../kylin/cube/cuboid/CuboidModeEnum.java       |  48 ++++++
 .../cube/cuboid/TreeCuboidSchedulerTest.java    | 148 +++++++++++++++++++
 4 files changed, 333 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java
----------------------------------------------------------------------
diff --git 
a/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java 
b/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java
index 4f3086a..5a81463 100644
--- a/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java
+++ b/core-common/src/main/java/org/apache/kylin/common/util/JsonUtil.java
@@ -72,6 +72,10 @@ public class JsonUtil {
         return mapper.readValue(src, valueType);
     }
 
+    public static <T> T readValue(String content, TypeReference<T> 
valueTypeRef) throws IOException {
+        return mapper.readValue(content, valueTypeRef);
+    }
+
     public static Map<String, String> readValueAsMap(String content) throws 
IOException {
         TypeReference<HashMap<String, String>> typeRef = new 
TypeReference<HashMap<String, String>>() {
         };

http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java 
b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
index 246cbf6..fa270fd 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeInstance.java
@@ -18,15 +18,24 @@
 
 package org.apache.kylin.cube;
 
+import static org.apache.kylin.cube.cuboid.CuboidModeEnum.CURRENT;
+import static org.apache.kylin.cube.cuboid.CuboidModeEnum.RECOMMEND;
+
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.KylinConfigExt;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.RootPersistentEntity;
+import org.apache.kylin.common.util.CompressionUtils;
+import org.apache.kylin.common.util.JsonUtil;
+import org.apache.kylin.cube.cuboid.CuboidModeEnum;
 import org.apache.kylin.cube.cuboid.CuboidScheduler;
+import org.apache.kylin.cube.cuboid.TreeCuboidScheduler;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.metadata.model.ColumnDesc;
 import org.apache.kylin.metadata.model.DataModelDesc;
@@ -51,6 +60,7 @@ import 
com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonManagedReference;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.type.TypeReference;
 import com.google.common.base.Objects;
 import com.google.common.collect.Lists;
 
@@ -96,6 +106,15 @@ public class CubeInstance extends RootPersistentEntity 
implements IRealization,
     @JsonProperty("create_time_utc")
     private long createTimeUTC;
 
+    @JsonProperty("cuboidBytes")
+    private byte[] cuboidBytes;
+
+    @JsonProperty("cuboid_bytes_recommend")
+    private byte[] cuboidBytesRecommend;
+
+    @JsonProperty("last_optimized")
+    private long lastOptimized;
+
     // cuboid scheduler lazy built
     transient private CuboidScheduler cuboidScheduler;
 
@@ -109,7 +128,14 @@ public class CubeInstance extends RootPersistentEntity 
implements IRealization,
 
         synchronized (this) {
             if (cuboidScheduler == null) {
-                cuboidScheduler = getDescriptor().getInitialCuboidScheduler();
+                Map<Long, Long> cuboidsWithRowCnt = getCuboids();
+                if (cuboidsWithRowCnt == null) {
+                    cuboidScheduler = 
getDescriptor().getInitialCuboidScheduler();
+                } else {
+                    cuboidScheduler = new TreeCuboidScheduler(getDescriptor(),
+                            Lists.newArrayList(cuboidsWithRowCnt.keySet()),
+                            new 
TreeCuboidScheduler.CuboidCostComparator(cuboidsWithRowCnt));
+                }
             }
         }
         return cuboidScheduler;
@@ -145,7 +171,8 @@ public class CubeInstance extends RootPersistentEntity 
implements IRealization,
     // in a temporary broken state, so that user can edit and fix it. Broken 
state is often due to
     // schema changes at source.
     public boolean allowBrokenDescriptor() {
-        return (getStatus() == RealizationStatusEnum.DISABLED || getStatus() 
== RealizationStatusEnum.DESCBROKEN) && segments.isEmpty();
+        return (getStatus() == RealizationStatusEnum.DISABLED || getStatus() 
== RealizationStatusEnum.DESCBROKEN)
+                && segments.isEmpty();
     }
 
     public String getResourcePath() {
@@ -305,6 +332,108 @@ public class CubeInstance extends RootPersistentEntity 
implements IRealization,
         this.createTimeUTC = createTimeUTC;
     }
 
+    public Set<Long> getCuboidsByMode(String cuboidModeName) {
+        return getCuboidsByMode(cuboidModeName == null ? null : 
CuboidModeEnum.getByModeName(cuboidModeName));
+    }
+
+    public Set<Long> getCuboidsByMode(CuboidModeEnum cuboidMode) {
+        if (cuboidMode == null || cuboidMode == CURRENT) {
+            return getCuboidScheduler().getAllCuboidIds();
+        }
+        Set<Long> cuboidsRecommend = getCuboidsRecommend();
+        if (cuboidsRecommend == null || cuboidMode == RECOMMEND) {
+            return cuboidsRecommend;
+        }
+        Set<Long> currentCuboids = getCuboidScheduler().getAllCuboidIds();
+        switch (cuboidMode) {
+        case RECOMMEND_EXISTING:
+            cuboidsRecommend.retainAll(currentCuboids);
+            return cuboidsRecommend;
+        case RECOMMEND_MISSING:
+            cuboidsRecommend.removeAll(currentCuboids);
+            return cuboidsRecommend;
+        case RECOMMEND_MISSING_WITH_BASE:
+            cuboidsRecommend.removeAll(currentCuboids);
+            currentCuboids.add(getCuboidScheduler().getBaseCuboidId());
+            return cuboidsRecommend;
+        default:
+            return null;
+        }
+    }
+
+    public Map<Long, Long> getCuboids() {
+        if (cuboidBytes == null)
+            return null;
+        byte[] uncompressed;
+        try {
+            uncompressed = CompressionUtils.decompress(cuboidBytes);
+            String str = new String(uncompressed, "UTF-8");
+            TypeReference<Map<Long, Long>> typeRef = new 
TypeReference<Map<Long, Long>>() {
+            };
+            Map<Long, Long> cuboids = JsonUtil.readValue(str, typeRef);
+            return cuboids.isEmpty() ? null : cuboids;
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void setCuboids(Map<Long, Long> cuboids) {
+        if (cuboids == null)
+            return;
+        if (cuboids.isEmpty()) {
+            cuboidBytes = null;
+            return;
+        }
+
+        try {
+            String str = JsonUtil.writeValueAsString(cuboids);
+            byte[] compressed = 
CompressionUtils.compress(str.getBytes("UTF-8"));
+            cuboidBytes = compressed;
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public Set<Long> getCuboidsRecommend() {
+        if (cuboidBytesRecommend == null)
+            return null;
+        byte[] uncompressed;
+        try {
+            uncompressed = CompressionUtils.decompress(cuboidBytesRecommend);
+            String str = new String(uncompressed, "UTF-8");
+            TypeReference<Set<Long>> typeRef = new TypeReference<Set<Long>>() {
+            };
+            Set<Long> cuboids = JsonUtil.readValue(str, typeRef);
+            return cuboids.isEmpty() ? null : cuboids;
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void setCuboidsRecommend(HashSet<Long> cuboids) {
+        if (cuboids == null)
+            return;
+        if (cuboids.isEmpty()) {
+            cuboidBytesRecommend = null;
+            return;
+        }
+        try {
+            String str = JsonUtil.writeValueAsString(cuboids);
+            byte[] compressed = 
CompressionUtils.compress(str.getBytes("UTF-8"));
+            cuboidBytesRecommend = compressed;
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Get cuboid level count except base cuboid
+     * @return
+     */
+    public int getBuildLevel() {
+        return getCuboidScheduler().getCuboidsByLayer().size() - 1;
+    }
+
     @Override
     public CapabilityResult isCapable(SQLDigest digest) {
         CapabilityResult result = CubeCapabilityChecker.check(this, digest);
@@ -379,7 +508,8 @@ public class CubeInstance extends RootPersistentEntity 
implements IRealization,
         if 
(!this.getDescriptor().getModel().getPartitionDesc().isPartitioned())
             return false;
 
-        return this.getDescriptor().getAutoMergeTimeRanges() != null && 
this.getDescriptor().getAutoMergeTimeRanges().length > 0;
+        return this.getDescriptor().getAutoMergeTimeRanges() != null
+                && this.getDescriptor().getAutoMergeTimeRanges().length > 0;
     }
 
     public SegmentRange autoMergeCubeSegments() throws IOException {

http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java
new file mode 100644
index 0000000..f55c9db
--- /dev/null
+++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/CuboidModeEnum.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid;
+
+import com.google.common.base.Strings;
+
+public enum CuboidModeEnum {
+    CURRENT("CURRENT"), RECOMMEND("RECOMMEND"), 
RECOMMEND_EXISTING("RECOMMEND_EXISTING"), RECOMMEND_MISSING(
+            "RECOMMEND_MISSING"), 
RECOMMEND_MISSING_WITH_BASE("RECOMMEND_MISSING_WITH_BASE");
+
+    private final String modeName;
+
+    CuboidModeEnum(String modeName) {
+        this.modeName = modeName;
+    }
+
+    public String toString() {
+        return modeName;
+    }
+
+    public static CuboidModeEnum getByModeName(String modeName) {
+        if (Strings.isNullOrEmpty(modeName)) {
+            return null;
+        }
+        for (CuboidModeEnum mode : CuboidModeEnum.values()) {
+            if (mode.modeName.equals(modeName.toUpperCase())) {
+                return mode;
+            }
+        }
+        return null;
+    }
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/0eded0ad/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java
 
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java
new file mode 100644
index 0000000..41fa807
--- /dev/null
+++ 
b/core-cube/src/test/java/org/apache/kylin/cube/cuboid/TreeCuboidSchedulerTest.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube.cuboid;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.kylin.cube.cuboid.TreeCuboidScheduler.CuboidTree;
+import org.junit.Test;
+
+import com.google.common.collect.Maps;
+
+public class TreeCuboidSchedulerTest {
+
+    @Test
+    public void testCreateCuboidTree() {
+        long basicCuboid = getBaseCuboid(10);
+        List<Long> cuboids = genRandomCuboids(basicCuboid, 200);
+        CuboidTree cuboidTree = CuboidTree.createFromCuboids(cuboids);
+        PrintWriter out = new PrintWriter(System.out);
+        cuboidTree.print(out);
+        out.flush();
+    }
+
+    @Test
+    public void testSpanningChild() {
+        long basicCuboid = getBaseCuboid(10);
+        List<Long> cuboids = genRandomCuboids(basicCuboid, 50);
+        long testCuboid = cuboids.get(10);
+        System.out.println(cuboids);
+        CuboidTree cuboidTree = CuboidTree.createFromCuboids(cuboids);
+        PrintWriter out = new PrintWriter(System.out);
+        cuboidTree.print(out);
+        out.flush();
+
+        List<Long> spanningChildren = cuboidTree.getSpanningCuboid(testCuboid);
+        System.out.println(testCuboid + ":" + spanningChildren);
+    }
+
+    @Test
+    public void testFindBestMatchCuboid() {
+        CuboidTree cuboidTree = createCuboidTree1();
+        PrintWriter out = new PrintWriter(System.out);
+        cuboidTree.print(out);
+        out.flush();
+
+        assertEquals(503L, cuboidTree.findBestMatchCuboid(503L));
+
+        long bestMatch1 = 
cuboidTree.findBestMatchCuboid(Long.parseLong("100000000", 2));
+        assertEquals(263, bestMatch1);
+
+        long bestMatch2 = 
cuboidTree.findBestMatchCuboid(Long.parseLong("100010000", 2));
+        assertEquals(304, bestMatch2);
+    }
+
+    private List<Long> genRandomCuboids(long basicCuboidId, int count) {
+        Random random = new Random();
+        List<Long> result = new ArrayList<>();
+        result.add(basicCuboidId);
+        for (int i = 0; i < count; i++) {
+            result.add(random.nextLong() & basicCuboidId);
+        }
+        return result;
+    }
+
+    private long getBaseCuboid(int dimensionCnt) {
+        if (dimensionCnt > 64) {
+            throw new IllegalArgumentException("the dimension count cannot 
exceed 64");
+        }
+        long result = 0;
+        for (int i = 0; i < dimensionCnt; i++) {
+            result |= (1 << i);
+        }
+        return result;
+    }
+
+    private CuboidTree createCuboidTree1() {
+        List<Long> cuboids = Arrays.asList(504L, 511L, 447L, 383L, 503L, 440L, 
496L, 376L, 439L, 487L, 375L, 319L, 432L,
+                480L, 368L, 312L, 423L, 455L, 311L, 359L, 416L, 448L, 304L, 
352L, 391L, 295L, 327L, 384L, 288L, 320L,
+                263L);
+        return CuboidTree.createFromCuboids(cuboids,
+                new 
TreeCuboidScheduler.CuboidCostComparator(simulateStatistics()));
+    }
+
+    private Map<Long, Long> simulateStatistics() {
+        Map<Long, Long> countMap = Maps.newHashMap();
+        countMap.put(511L, 1000000L);
+
+        countMap.put(504L, 900000L);
+        countMap.put(447L, 990000L);
+        countMap.put(383L, 991000L);
+        countMap.put(503L, 980000L);
+
+        countMap.put(440L, 800000L);
+        countMap.put(496L, 890000L);
+        countMap.put(376L, 891000L);
+        countMap.put(439L, 751000L);
+        countMap.put(487L, 751000L);
+        countMap.put(375L, 741000L);
+        countMap.put(319L, 740000L);
+
+        countMap.put(432L, 600000L);
+        countMap.put(480L, 690000L);
+        countMap.put(368L, 691000L);
+        countMap.put(312L, 651000L);
+        countMap.put(423L, 651000L);
+        countMap.put(455L, 541000L);
+        countMap.put(311L, 540000L);
+        countMap.put(359L, 530000L);
+
+        countMap.put(416L, 400000L);
+        countMap.put(448L, 490000L);
+        countMap.put(304L, 491000L);
+        countMap.put(352L, 451000L);
+        countMap.put(391L, 351000L);
+        countMap.put(295L, 141000L);
+        countMap.put(327L, 240000L);
+
+        countMap.put(384L, 100000L);
+        countMap.put(288L, 90000L);
+        countMap.put(320L, 91000L);
+        countMap.put(263L, 51000L);
+
+        return countMap;
+    }
+}

Reply via email to