KYLIN-2249 Bug fix : build cube error when use inmem but ok with layer

Signed-off-by: Yang Li <liy...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/4d52147e
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/4d52147e
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/4d52147e

Branch: refs/heads/v2.0.0-release-hbase0.98
Commit: 4d52147e81e2581e39256e56680fd06ca1d4963c
Parents: 6c376b2
Author: xiefan46 <958034...@qq.com>
Authored: Tue Apr 25 18:10:09 2017 +0800
Committer: Yang Li <liy...@apache.org>
Committed: Tue Apr 25 20:17:58 2017 +0800

----------------------------------------------------------------------
 .../kylin/dict/TrieDictionaryBuilder.java       | 33 +++++++++++++-------
 .../apache/kylin/dict/TrieDictionaryForest.java | 14 ++++-----
 .../kylin/dict/TrieDictionaryForestBuilder.java |  2 +-
 .../kylin/dict/TrieDictionaryForestTest.java    | 11 +++++++
 .../apache/kylin/dict/TrieDictionaryTest.java   | 10 ++++++
 .../kylin/dimension/DictionaryDimEnc.java       |  2 +-
 6 files changed, 51 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
 
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
index 102c49e..1750ac1 100644
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java
@@ -34,11 +34,11 @@ import org.apache.kylin.common.util.BytesUtil;
  * Builds a dictionary using Trie structure. All values are taken in byte[] 
form
  * and organized in a Trie with ordering. Then numeric IDs are assigned in
  * sequence.
- * 
+ *
  * @author yangli9
  */
 public class TrieDictionaryBuilder<T> {
-    
+
     private static final int _2GB = 2000000000;
 
     public static class Node {
@@ -76,6 +76,8 @@ public class TrieDictionaryBuilder<T> {
     private Node root;
     protected BytesConverter<T> bytesConverter;
 
+    private boolean hasValue = false;
+
     public TrieDictionaryBuilder(BytesConverter<T> bytesConverter) {
         this.root = new Node(new byte[0], false);
         this.bytesConverter = bytesConverter;
@@ -91,6 +93,7 @@ public class TrieDictionaryBuilder<T> {
     }
 
     private void addValueR(Node node, byte[] value, int start) {
+        hasValue = true;
         // match the value part of current node
         int i = 0, j = start;
         int n = node.part.length, nn = value.length;
@@ -179,10 +182,8 @@ public class TrieDictionaryBuilder<T> {
 
     public static class Stats {
         public int nValues; // number of values in total
-        public int nValueBytesPlain; // number of bytes for all values
-                                     // uncompressed
-        public int nValueBytesCompressed; // number of values bytes in Trie
-                                          // (compressed)
+        public int nValueBytesPlain; // number of bytes for all values 
uncompressed
+        public int nValueBytesCompressed; // number of values bytes in Trie 
(compressed)
         public int maxValueLength; // size of longest value in bytes
 
         // the trie is multi-byte-per-node
@@ -234,7 +235,13 @@ public class TrieDictionaryBuilder<T> {
         }
     }
 
-    /** out print some statistics of the trie and the dictionary built from it 
*/
+    public boolean isHasValue() {
+        return hasValue;
+    }
+
+    /**
+     * out print some statistics of the trie and the dictionary built from it
+     */
     public Stats stats() {
         // calculate nEndValueBeneath
         traversePostOrder(new Visitor() {
@@ -313,7 +320,9 @@ public class TrieDictionaryBuilder<T> {
         return s;
     }
 
-    /** out print trie for debug */
+    /**
+     * out print trie for debug
+     */
     public void print() {
         print(System.out);
     }
@@ -396,11 +405,11 @@ public class TrieDictionaryBuilder<T> {
     /**
      * Flatten the trie into a byte array for a minimized memory footprint.
      * Lookup remains fast. Cost is inflexibility to modify (becomes 
immutable).
-     * 
+     * <p>
      * Flattened node structure is HEAD + NODEs, for each node:
      * - o byte, offset to child node, o = stats.mbpn_sizeChildOffset
-     *    - 1 bit, isLastChild flag, the 1st MSB of o
-     *    - 1 bit, isEndOfValue flag, the 2nd MSB of o
+     *   - 1 bit, isLastChild flag, the 1st MSB of o
+     *   - 1 bit, isEndOfValue flag, the 2nd MSB of o
      * - c byte, number of values beneath, c = stats.mbpn_sizeNoValueBeneath
      * - 1 byte, number of value bytes
      * - n byte, value bytes
@@ -417,7 +426,7 @@ public class TrieDictionaryBuilder<T> {
         Stats stats = stats();
         int sizeNoValuesBeneath = stats.mbpn_sizeNoValueBeneath;
         int sizeChildOffset = stats.mbpn_sizeChildOffset;
-        
+
         if (stats.mbpn_footprint <= 0) // must never happen, but let us be 
cautious
             throw new IllegalStateException("Too big dictionary, dictionary 
cannot be bigger than 2GB");
         if (stats.mbpn_footprint > _2GB)

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java 
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
index 1023892..09d5bc2 100755
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java
@@ -63,7 +63,7 @@ public class TrieDictionaryForest<T> extends 
CacheDictionary<T> {
     }
 
     public TrieDictionaryForest(ArrayList<TrieDictionary<T>> trees, 
ArrayList<ByteArray> valueDivide, //
-            ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, 
int baseId) {
+                                ArrayList<Integer> accuOffset, 
BytesConverter<T> bytesConverter, int baseId) {
         init(trees, valueDivide, accuOffset, bytesConverter, baseId);
     }
 
@@ -342,7 +342,7 @@ public class TrieDictionaryForest<T> extends 
CacheDictionary<T> {
         initSizeOfValue();
     }
 
-    private void initMaxValueForEachTrie(){
+    private void initMaxValueForEachTrie() {
         //init max value
         this.maxValue = new ArrayList<>();
         if (this.trees == null || trees.isEmpty()) {
@@ -356,7 +356,7 @@ public class TrieDictionaryForest<T> extends 
CacheDictionary<T> {
         }
     }
 
-    private void initMaxId(){
+    private void initMaxId() {
         if (trees.isEmpty()) {
             this.maxId = baseId - 1;
             return;
@@ -365,7 +365,7 @@ public class TrieDictionaryForest<T> extends 
CacheDictionary<T> {
         this.maxId = accuOffset.get(index) + trees.get(index).getMaxId() + 
baseId;
     }
 
-    private void initMinId(){
+    private void initMinId() {
         if (trees.isEmpty()) {
             this.minId = baseId;
             return;
@@ -373,8 +373,8 @@ public class TrieDictionaryForest<T> extends 
CacheDictionary<T> {
         this.minId = trees.get(0).getMinId() + baseId;
     }
 
-    private void initSizeOfId(){
-        if (trees.isEmpty()){
+    private void initSizeOfId() {
+        if (trees.isEmpty()) {
             this.sizeOfId = 1;
             return;
         }
@@ -383,7 +383,7 @@ public class TrieDictionaryForest<T> extends 
CacheDictionary<T> {
         this.sizeOfId = BytesUtil.sizeForValue(baseId + maxOffset + 
lastTree.getMaxId() + 1L);
     }
 
-    private void initSizeOfValue(){
+    private void initSizeOfValue() {
         int maxValue = 0;
         for (TrieDictionary<T> tree : trees)
             maxValue = Math.max(maxValue, tree.getSizeOfValue());

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
 
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
index 69da472..0e5e63e 100755
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java
@@ -105,7 +105,7 @@ public class TrieDictionaryForestBuilder<T> {
     }
 
     public TrieDictionaryForest<T> build() {
-        if (curTreeSize != 0) { //last tree
+        if (trieBuilder.isHasValue()) { //last tree
             TrieDictionary<T> tree = trieBuilder.build(0);
             addTree(tree);
             reset();

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index dd1f951..82380b3 100755
--- 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -133,6 +133,17 @@ public class TrieDictionaryForestTest {
     }
 
     @Test
+    public void testAllNullValue() {
+        ArrayList<String> strs = new ArrayList<String>();
+        strs.add("");
+        int maxTreeSize = 10;
+        TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, 
maxTreeSize);
+        TrieDictionaryForest<String> dict = builder.build();
+        assertEquals(1, dict.getSize());
+        assertEquals(0, dict.getIdFromValue(""));
+    }
+
+    @Test
     public void testBigDataSet() {
         //h=generate data
         ArrayList<String> strs = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
index 22a93a0..13c83ac 100644
--- 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
+++ 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
@@ -221,6 +221,16 @@ public class TrieDictionaryTest {
         testStringDictionary(str, null);
     }
 
+    @Test
+    public void testAllNullValue() {
+        ArrayList<String> strs = new ArrayList<String>();
+        strs.add("");
+        TrieDictionaryBuilder<String> builder = newDictBuilder(strs);
+        TrieDictionary<String> dict = builder.build(0);
+        assertEquals(1, dict.getSize());
+        assertEquals(0, dict.getIdFromValue(""));
+    }
+
     private static void benchmarkStringDictionary(Iterable<String> str) throws 
IOException {
         TrieDictionaryBuilder<String> b = newDictBuilder(str);
         b.stats().print();

http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java
----------------------------------------------------------------------
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java 
b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java
index 46f9117..b022d84 100644
--- 
a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java
+++ 
b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java
@@ -30,7 +30,7 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class DictionaryDimEnc extends DimensionEncoding implements 
Serializable{
+public class DictionaryDimEnc extends DimensionEncoding implements 
Serializable {
     private static final long serialVersionUID = 1L;
 
     private static final Logger logger = 
LoggerFactory.getLogger(DictionaryDimEnc.class);

Reply via email to