Repository: kylin
Updated Branches:
  refs/heads/master 00ba74327 -> 0a0c5547d


KYLIN-1851 Refactor NumberDictionaryForest


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/0a0c5547
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/0a0c5547
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/0a0c5547

Branch: refs/heads/master
Commit: 0a0c5547d4f7d87696822b322a9a78112ec26c40
Parents: 00ba743
Author: Li Yang <[email protected]>
Authored: Thu Nov 17 11:34:43 2016 +0800
Committer: Li Yang <[email protected]>
Committed: Thu Nov 17 11:34:54 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/dict/DictionaryGenerator.java  |   2 +-
 .../kylin/dict/NumberDictionaryBuilder.java     |   1 -
 .../kylin/dict/NumberDictionaryForest.java      | 284 -------------------
 .../dict/NumberDictionaryForestBuilder.java     |  68 +++--
 .../kylin/dict/TrieDictionaryForestTest.java    | 260 ++++++++---------
 .../mr/steps/NumberDictionaryForestTest.java    |  19 +-
 .../mr/steps/SelfDefineSortableKeyTest.java     |  22 +-
 7 files changed, 180 insertions(+), 476 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java 
b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 8eafe5f..ad07423 100644
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -156,7 +156,7 @@ public class DictionaryGenerator {
     private static class NumberDictBuilder implements IDictionaryBuilder {
         @Override
         public Dictionary<String> build(DictionaryInfo dictInfo, 
IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, 
ArrayList<String> returnSamples) throws IOException {
-            NumberDictionaryForestBuilder builder = new 
NumberDictionaryForestBuilder(new StringBytesConverter(), baseId);
+            NumberDictionaryForestBuilder builder = new 
NumberDictionaryForestBuilder(baseId);
             byte[] value;
             while (valueEnumerator.moveNext()) {
                 value = valueEnumerator.current();

http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java
 
b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java
index 6d7d0db..27d81ba 100644
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryBuilder.java
@@ -22,7 +22,6 @@ import org.apache.kylin.common.util.Bytes;
 
 /**
  * @author yangli9
- * 
  */
 public class NumberDictionaryBuilder<T> extends TrieDictionaryBuilder<T> {
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
 
b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
deleted file mode 100644
index fdf1e68..0000000
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForest.java
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.dict;
-
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.common.util.Dictionary;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.PrintStream;
-
-/**
- * Created by xiefan on 16-11-1.
- * <p>
- * notice:number dictionary forest currently could not handle
- * very big or very small double and float value such as 4.9E-324
- */
-public class NumberDictionaryForest<T> extends Dictionary<T> {
-
-    public static final int MAX_DIGITS_BEFORE_DECIMAL_POINT = 19;
-
-    // encode a number into an order preserving byte sequence
-    // for positives -- padding '0'
-    // for negatives -- '-' sign, padding '9', invert digits, and terminate by 
';'
-    static class NumberBytesCodec {
-        int maxDigitsBeforeDecimalPoint;
-        byte[] buf;
-        int bufOffset;
-        int bufLen;
-
-        NumberBytesCodec(int maxDigitsBeforeDecimalPoint) {
-            this.maxDigitsBeforeDecimalPoint = maxDigitsBeforeDecimalPoint;
-            this.buf = new byte[maxDigitsBeforeDecimalPoint * 3];
-            this.bufOffset = 0;
-            this.bufLen = 0;
-        }
-
-        void encodeNumber(byte[] value, int offset, int len) {
-            if (len == 0) {
-                bufOffset = 0;
-                bufLen = 0;
-                return;
-            }
-
-            if (len > buf.length) {
-                throw new IllegalArgumentException("Too many digits for 
NumberDictionary: " + Bytes.toString(value, offset, len) + ". Internal buffer 
is only " + buf.length + " bytes");
-            }
-
-            boolean negative = value[offset] == '-';
-
-            // terminate negative ';'
-            int start = buf.length - len;
-            int end = buf.length;
-            if (negative) {
-                start--;
-                end--;
-                buf[end] = ';';
-            }
-
-            // copy & find decimal point
-            int decimalPoint = end;
-            for (int i = start, j = offset; i < end; i++, j++) {
-                buf[i] = value[j];
-                if (buf[i] == '.' && i < decimalPoint) {
-                    decimalPoint = i;
-                }
-            }
-            // remove '-' sign
-            if (negative) {
-                start++;
-            }
-
-            // prepend '0'
-            int nZeroPadding = maxDigitsBeforeDecimalPoint - (decimalPoint - 
start);
-            if (nZeroPadding < 0 || nZeroPadding + 1 > start)
-                throw new IllegalArgumentException("Too many digits for 
NumberDictionary: " + Bytes.toString(value, offset, len) + ". Expect " + 
maxDigitsBeforeDecimalPoint + " digits before decimal point at max.");
-            for (int i = 0; i < nZeroPadding; i++) {
-                buf[--start] = '0';
-            }
-
-            // consider negative
-            if (negative) {
-                buf[--start] = '-';
-                for (int i = start + 1; i < buf.length; i++) {
-                    int c = buf[i];
-                    if (c >= '0' && c <= '9') {
-                        buf[i] = (byte) ('9' - (c - '0'));
-                    }
-                }
-            } else {
-                buf[--start] = '0';
-            }
-
-            bufOffset = start;
-            bufLen = buf.length - start;
-        }
-
-        int decodeNumber(byte[] returnValue, int offset) {
-            if (bufLen == 0) {
-                return 0;
-            }
-
-            int in = bufOffset;
-            int end = bufOffset + bufLen;
-            int out = offset;
-
-            // sign
-            boolean negative = buf[in] == '-';
-            if (negative) {
-                returnValue[out++] = '-';
-                in++;
-                end--;
-            }
-
-            // remove padding
-            byte padding = (byte) (negative ? '9' : '0');
-            for (; in < end; in++) {
-                if (buf[in] != padding)
-                    break;
-            }
-
-            // all paddings before '.', special case for '0'
-            if (in == end || !(buf[in] >= '0' && buf[in] <= '9')) {
-                returnValue[out++] = '0';
-            }
-
-            // copy the rest
-            if (negative) {
-                for (; in < end; in++, out++) {
-                    int c = buf[in];
-                    if (c >= '0' && c <= '9') {
-                        c = '9' - (c - '0');
-                    }
-                    returnValue[out] = (byte) c;
-                }
-            } else {
-                System.arraycopy(buf, in, returnValue, out, end - in);
-                out += end - in;
-            }
-
-            return out - offset;
-        }
-    }
-
-    static ThreadLocal<NumberBytesCodec> localCodec =
-            new ThreadLocal<NumberBytesCodec>();
-
-    // 
============================================================================
-
-    private TrieDictionaryForest<T> dict;
-
-    private BytesConverter<T> converter;
-
-    public NumberDictionaryForest() {
-    }
-
-    public NumberDictionaryForest(TrieDictionaryForest<T> dict, 
BytesConverter<T> converter) {
-        this.dict = dict;
-        this.converter = converter;
-    }
-
-    protected NumberBytesCodec getCodec() {
-        NumberBytesCodec codec = localCodec.get();
-        if (codec == null) {
-            codec = new NumberBytesCodec(MAX_DIGITS_BEFORE_DECIMAL_POINT);
-            localCodec.set(codec);
-        }
-        return codec;
-    }
-
-    @Override
-    public int getMinId() {
-        return dict.getMinId();
-    }
-
-    @Override
-    public int getMaxId() {
-        return dict.getMaxId();
-    }
-
-    @Override
-    public int getSizeOfId() {
-        return dict.getSizeOfId();
-    }
-
-    @Override
-    public int getSizeOfValue() {
-        return dict.getSizeOfValue();
-    }
-
-    @Override
-    public boolean contains(Dictionary<?> another) {
-        return dict.contains(another);
-    }
-
-    @Override
-    protected int getIdFromValueImpl(T value, int roundingFlag) {
-        if (value == null) return -1;
-        byte[] data = converter.convertToBytes(value);
-        return getIdFromValueBytesImpl(data, 0, data.length, roundingFlag);
-    }
-
-    @Override
-    protected int getIdFromValueBytesImpl(byte[] value, int offset, int len, 
int roundingFlag) {
-        NumberBytesCodec codec = getCodec();
-        codec.encodeNumber(value, offset, len);
-        return this.dict.getIdFromValueBytesImpl(codec.buf, codec.bufOffset, 
codec.bufLen, roundingFlag);
-    }
-
-    @Override
-    protected T getValueFromIdImpl(int id) {
-        byte[] data = getValueBytesFromIdImpl(id);
-        if (data == null) return null;
-        else return converter.convertFromBytes(data, 0, data.length);
-    }
-
-    @Override
-    protected byte[] getValueBytesFromIdImpl(int id) {
-        NumberBytesCodec codec = getCodec();
-        codec.bufOffset = 0;
-        byte[] buf = new byte[dict.getSizeOfValue()];
-        codec.bufLen = getValueBytesFromIdImpl(id, buf, 0);
-
-        if (codec.bufLen == buf.length) {
-            return buf;
-        } else {
-            byte[] result = new byte[codec.bufLen];
-            System.arraycopy(buf, 0, result, 0, codec.bufLen);
-            return result;
-        }
-    }
-
-    @Override
-    protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int 
offset) {
-        NumberBytesCodec codec = getCodec();
-        codec.bufOffset = 0;
-        codec.bufLen = this.dict.getValueBytesFromIdImpl(id, codec.buf, 0);
-        return codec.decodeNumber(returnValue, offset);
-    }
-
-    @Override
-    public void dump(PrintStream out) {
-        dict.dump(out);
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-        dict.write(out);
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-        this.dict = new TrieDictionaryForest<>();
-        dict.readFields(in);
-        this.converter = this.dict.getBytesConvert();
-    }
-
-    public BytesConverter<T> getConverter() {
-        return converter;
-    }
-
-    public int getTreeSize(){
-        return this.dict.getTrees().size();
-    }
-
-
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
 
b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
index 519d4c3..4bd6c0f 100644
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/NumberDictionaryForestBuilder.java
@@ -19,49 +19,57 @@
 package org.apache.kylin.dict;
 
 import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.dict.NumberDictionary.NumberBytesCodec;
 
 /**
  * Created by xiefan on 16-11-2.
  */
-public class NumberDictionaryForestBuilder<T> {
+public class NumberDictionaryForestBuilder extends 
TrieDictionaryForestBuilder<String> {
 
-    private TrieDictionaryForestBuilder<T> trieBuilder;
+    public static class Number2BytesConverter implements 
BytesConverter<String> {
 
-    private BytesConverter<T> bytesConverter;
+        static final int MAX_DIGITS_BEFORE_DECIMAL_POINT = 
NumberDictionary.MAX_DIGITS_BEFORE_DECIMAL_POINT;
+        static final ThreadLocal<NumberBytesCodec> LOCAL = new 
ThreadLocal<NumberBytesCodec>();
 
-    private NumberDictionaryForest.NumberBytesCodec codec = new 
NumberDictionaryForest.NumberBytesCodec(NumberDictionaryForest.MAX_DIGITS_BEFORE_DECIMAL_POINT);
+        static NumberBytesCodec getCodec() {
+            NumberBytesCodec codec = LOCAL.get();
+            if (codec == null) {
+                codec = new NumberBytesCodec(MAX_DIGITS_BEFORE_DECIMAL_POINT);
+                LOCAL.set(codec);
+            }
+            return codec;
+        }
+        
+        @Override
+        public byte[] convertToBytes(String v) {
+            NumberBytesCodec codec = getCodec();
+            byte[] num = Bytes.toBytes(v);
+            codec.encodeNumber(num, 0, num.length);
+            return Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen);
+        }
 
-    public NumberDictionaryForestBuilder(BytesConverter<T> bytesConverter) {
-        this(bytesConverter, 0);
+        @Override
+        public String convertFromBytes(byte[] b, int offset, int length) {
+            NumberBytesCodec codec = getCodec();
+            byte[] backup = codec.buf;
+            codec.buf = b;
+            codec.bufOffset = offset;
+            codec.bufLen = length;
+            int len = codec.decodeNumber(backup, 0);
+            codec.buf = backup;
+            return Bytes.toString(backup, 0, len);
+        }
     }
 
-    public NumberDictionaryForestBuilder(BytesConverter<T> bytesConverter, int 
baseId) {
-        this.trieBuilder = new TrieDictionaryForestBuilder<T>(bytesConverter, 
baseId);
-        this.bytesConverter = bytesConverter;
+    public NumberDictionaryForestBuilder() {
+        super(new Number2BytesConverter());
     }
 
-    public NumberDictionaryForestBuilder(BytesConverter<T> bytesConverter, int 
baseId, int maxTrieSizeMB) {
-        this.trieBuilder = new TrieDictionaryForestBuilder<T>(bytesConverter, 
baseId, maxTrieSizeMB);
-        this.bytesConverter = bytesConverter;
+    public NumberDictionaryForestBuilder(int baseId) {
+        super(new Number2BytesConverter(), 0);
     }
 
-    public void addValue(T value) {
-        addValue(bytesConverter.convertToBytes(value));
-    }
-
-    public void addValue(byte[] value) {
-        codec.encodeNumber(value, 0, value.length);
-        byte[] copy = Bytes.copy(codec.buf, codec.bufOffset, codec.bufLen);
-        this.trieBuilder.addValue(copy);
-    }
-
-    //TODO:ensure ordered
-    public NumberDictionaryForest<T> build() {
-        TrieDictionaryForest<T> forest = trieBuilder.build();
-        return new NumberDictionaryForest<T>(forest, bytesConverter);
-    }
-
-    public void setMaxTrieSize(int size) {
-        this.trieBuilder.setMaxTrieTreeSize(size);
+    public NumberDictionaryForestBuilder(int baseId, int maxTrieSizeMB) {
+        super(new Number2BytesConverter(), 0, maxTrieSizeMB);
     }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index 07511d1..c4c0fd8 100755
--- 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -16,29 +16,41 @@
  * limitations under the License.
 */
 
-
 package org.apache.kylin.dict;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Random;
+import java.util.TreeSet;
 
-import org.apache.kylin.common.util.Array;
-import org.apache.kylin.common.util.MemoryBudgetController;
 import org.junit.Ignore;
 import org.junit.Test;
 
-import java.io.*;
-import java.util.*;
-
-import static org.junit.Assert.*;
-import static org.junit.Assert.assertEquals;
-
 /**
  * Created by xiefan on 16-10-26.
  */
 
 public class TrieDictionaryForestTest {
 
-
-
     @Test
     public void testBasicFound() {
         ArrayList<String> strs = new ArrayList<String>();
@@ -62,7 +74,7 @@ public class TrieDictionaryForestTest {
         System.out.println("test ok");
     }
 
-    @Test  //one string one tree
+    @Test //one string one tree
     public void testMultiTree() {
         ArrayList<String> strs = new ArrayList<String>();
         strs.add("part");
@@ -71,7 +83,7 @@ public class TrieDictionaryForestTest {
         strs.add("party");
         strs.add("parties");
         strs.add("paint");
-        strs.add("一二三");  //Chinese test
+        strs.add("一二三"); //Chinese test
         strs.add("四五六");
         strs.add("");
         Collections.sort(strs, new ByteComparator<String>(new 
StringBytesConverter()));
@@ -91,7 +103,7 @@ public class TrieDictionaryForestTest {
     }
 
     @Test
-    public void testNullValue(){
+    public void testNullValue() {
         //encounter null value when building dictionary
         ArrayList<String> strs = new ArrayList<String>();
         strs.add(null);
@@ -102,15 +114,14 @@ public class TrieDictionaryForestTest {
         TrieDictionaryForest<String> dict = builder.build();
         dict.dump(System.out);
         //null value query
-        int id = dict.getIdFromValue(null,0);
+        int id = dict.getIdFromValue(null, 0);
         System.out.println(id);
-        id = dict.getIdFromValue(null,1);
+        id = dict.getIdFromValue(null, 1);
         System.out.println(id);
-        id = dict.getIdFromValue(null,-1);
+        id = dict.getIdFromValue(null, -1);
         System.out.println(id);
     }
 
-
     @Test
     public void testBigDataSet() {
         //h=generate data
@@ -209,7 +220,6 @@ public class TrieDictionaryForestTest {
         testStringDictionary(str, notFound);
     }
 
-
     @Test
     public void dictionaryContainTest() {
         ArrayList<String> str = new ArrayList<String>();
@@ -263,103 +273,102 @@ public class TrieDictionaryForestTest {
         }
     }
 
-
     @Test
-    public void roundingFlagTest(){
+    public void roundingFlagTest() {
         ArrayList<String> testData = new ArrayList<>();
         testData.add("b");
         testData.add("bdd");
         testData.add("ccc");
         int baseId = 10;
-        TrieDictionaryForestBuilder<String> b = 
TrieDictionaryForestTest.newDictBuilder(testData,baseId, 0);
+        TrieDictionaryForestBuilder<String> b = 
TrieDictionaryForestTest.newDictBuilder(testData, baseId, 0);
         TrieDictionaryForest<String> dict = b.build();
 
         //left
         String smallerStr = "a";
         int id;
-        try{
-            id = dict.getIdFromValue(smallerStr,0);
-            fail("should throw IllegalArgumentException,but id is:"+id);
-        }catch (IllegalArgumentException e){
+        try {
+            id = dict.getIdFromValue(smallerStr, 0);
+            fail("should throw IllegalArgumentException,but id is:" + id);
+        } catch (IllegalArgumentException e) {
             //correct
         }
-        try{
-            id = dict.getIdFromValue(smallerStr,-1);
-            fail("should throw IllegalArgumentException,but id is:"+id);
-        }catch (IllegalArgumentException e){
+        try {
+            id = dict.getIdFromValue(smallerStr, -1);
+            fail("should throw IllegalArgumentException,but id is:" + id);
+        } catch (IllegalArgumentException e) {
             //correct
         }
-        id = dict.getIdFromValue(smallerStr,1);
-        assertEquals(baseId,id);
+        id = dict.getIdFromValue(smallerStr, 1);
+        assertEquals(baseId, id);
 
         //middle
         String middleStr = "bd";
-        try{
-            id = dict.getIdFromValue(middleStr,0);
-            fail("should throw IllegalArgumentException,but id is:"+id);
-        }catch (IllegalArgumentException e){
+        try {
+            id = dict.getIdFromValue(middleStr, 0);
+            fail("should throw IllegalArgumentException,but id is:" + id);
+        } catch (IllegalArgumentException e) {
             //correct
         }
-        id = dict.getIdFromValue(middleStr,-1);
-        assertEquals(baseId,id);
-        id = dict.getIdFromValue(middleStr,1);
-        assertEquals(baseId+1,id);
+        id = dict.getIdFromValue(middleStr, -1);
+        assertEquals(baseId, id);
+        id = dict.getIdFromValue(middleStr, 1);
+        assertEquals(baseId + 1, id);
 
         //right
         String rightStr = "e";
-        try{
-            id = dict.getIdFromValue(rightStr,0);
-            fail("should throw IllegalArgumentException,but id is:"+id);
-        }catch (IllegalArgumentException e){
+        try {
+            id = dict.getIdFromValue(rightStr, 0);
+            fail("should throw IllegalArgumentException,but id is:" + id);
+        } catch (IllegalArgumentException e) {
             //correct
         }
-        id = dict.getIdFromValue(rightStr,-1);
-        assertEquals(baseId+2,id);
-        try{
-            id = dict.getIdFromValue(rightStr,1);
-            fail("should throw IllegalArgumentException,but id is:"+id);
-        }catch (IllegalArgumentException e){
+        id = dict.getIdFromValue(rightStr, -1);
+        assertEquals(baseId + 2, id);
+        try {
+            id = dict.getIdFromValue(rightStr, 1);
+            fail("should throw IllegalArgumentException,but id is:" + id);
+        } catch (IllegalArgumentException e) {
             //correct
         }
     }
 
     @Test
-    public void stringDictRoundFlagTest(){
+    public void stringDictRoundFlagTest() {
         TreeSet<String> set = new TreeSet<>(new ByteComparator<>(new 
StringBytesConverter()));
-        Iterator<String> it = new RandomStrings(10*10000).iterator();
+        Iterator<String> it = new RandomStrings(10 * 10000).iterator();
         int size = 0;
-        while(it.hasNext()){
+        while (it.hasNext()) {
             BytesConverter converter = new StringBytesConverter();
             String str = it.next();
             set.add(str);
             size += converter.convertToBytes(str).length;
         }
         int treeNum = 5;
-        TrieDictionaryForestBuilder<String> builder = 
newDictBuilder(set.iterator(),0,size / treeNum);
+        TrieDictionaryForestBuilder<String> builder = 
newDictBuilder(set.iterator(), 0, size / treeNum);
         TrieDictionaryForest<String> dict = builder.build();
         //dict.dump(System.out);
 
         //test roundingFlag > 0
-        Iterator<String> it2 = new RandomStrings(100*10000).iterator();
-        while(it2.hasNext()){
+        Iterator<String> it2 = new RandomStrings(100 * 10000).iterator();
+        while (it2.hasNext()) {
             String query = it2.next();
             //System.out.println("query:"+query);
             try {
                 int id = dict.getIdFromValue(query, 1);
-                assertEquals(set.ceiling(query),dict.getValueFromId(id));
-            }catch(IllegalArgumentException e){
+                assertEquals(set.ceiling(query), dict.getValueFromId(id));
+            } catch (IllegalArgumentException e) {
                 assertNull(set.ceiling(query));
             }
         }
 
         //test roundingFlag < 0
-        Iterator<String> it3 = new RandomStrings(100*10000).iterator();
-        while(it3.hasNext()){
+        Iterator<String> it3 = new RandomStrings(100 * 10000).iterator();
+        while (it3.hasNext()) {
             String query = it3.next();
             try {
                 int id = dict.getIdFromValue(query, -1);
-                assertEquals(set.floor(query),dict.getValueFromId(id));
-            }catch(IllegalArgumentException e){
+                assertEquals(set.floor(query), dict.getValueFromId(id));
+            } catch (IllegalArgumentException e) {
                 assertNull(set.floor(query));
             }
         }
@@ -367,15 +376,15 @@ public class TrieDictionaryForestTest {
     }
 
     @Test
-    public void longDictRoundingFlagTest(){
+    public void longDictRoundingFlagTest() {
         TreeSet<String> set = new TreeSet<>(new Comparator<String>() {
             @Override
             public int compare(String o1, String o2) {
-                try{
+                try {
                     Long l1 = Long.parseLong(o1);
                     Long l2 = Long.parseLong(o2);
                     return l1.compareTo(l2);
-                }catch(NumberFormatException e){
+                } catch (NumberFormatException e) {
                     e.printStackTrace();
                     return 0;
                 }
@@ -385,49 +394,45 @@ public class TrieDictionaryForestTest {
         int k = -48481;
         int size = 0;
         StringBytesConverter converter = new StringBytesConverter();
-        for(int i=0;i<num;i++)
-        {
-            String value = k+"";
+        for (int i = 0; i < num; i++) {
+            String value = k + "";
             set.add(value);
             k += 1;
             String basic = "-9999999999999952517";
             size += converter.convertToBytes(basic).length;
         }
-        System.out.println("tree num:"+size);
+        System.out.println("tree num:" + size);
         int treeNum = 5;
         //TrieDictionaryForestBuilder<String> builder = 
newDictBuilder(set.iterator(),0,size / treeNum);
         //TrieDictionaryForest<String> dict = builder.build();
-        NumberDictionaryForestBuilder<String> builder = new 
NumberDictionaryForestBuilder<String>(new StringBytesConverter(),0);
-        builder.setMaxTrieSize(size / treeNum);
+        TrieDictionaryForestBuilder builder = new 
NumberDictionaryForestBuilder(0);
+        builder.setMaxTrieTreeSize(size / treeNum);
         Iterator<String> it = set.iterator();
-        while(it.hasNext())
+        while (it.hasNext())
             builder.addValue(it.next());
-        NumberDictionaryForest<String> dict = builder.build();
-        System.out.println(dict.getTreeSize());
+        TrieDictionaryForest<String> dict = builder.build();
+        System.out.println(dict.getTrees().size());
 
         int testTimes = 100 * 10000;
         Random rand = new Random(System.currentTimeMillis());
         //test roundingFlag > 0
-        for(int i=0;i<testTimes;i++)
-        {
-            String query = rand.nextInt(2*num)+"";
+        for (int i = 0; i < testTimes; i++) {
+            String query = rand.nextInt(2 * num) + "";
             try {
                 int id = dict.getIdFromValue(query, 1);
-                assertEquals(set.ceiling(query),dict.getValueFromId(id));
-            }catch(IllegalArgumentException e){
+                assertEquals(set.ceiling(query), dict.getValueFromId(id));
+            } catch (IllegalArgumentException e) {
                 assertNull(set.ceiling(query));
             }
         }
 
-
         //test roundingFlag < 0
-        for(int i=0;i<testTimes;i++)
-        {
-            String query = rand.nextInt(2*num)+"";
+        for (int i = 0; i < testTimes; i++) {
+            String query = rand.nextInt(2 * num) + "";
             try {
                 int id = dict.getIdFromValue(query, -1);
-                assertEquals(set.floor(query),dict.getValueFromId(id));
-            }catch(IllegalArgumentException e){
+                assertEquals(set.floor(query), dict.getValueFromId(id));
+            } catch (IllegalArgumentException e) {
                 assertNull(set.floor(query));
             }
         }
@@ -480,15 +485,15 @@ public class TrieDictionaryForestTest {
      */
     @Ignore
     @Test
-    public void doubleDictRoundingFlagTest(){
+    public void doubleDictRoundingFlagTest() {
         TreeSet<String> set = new TreeSet<>(new Comparator<String>() {
             @Override
             public int compare(String o1, String o2) {
-                try{
+                try {
                     Double d1 = Double.parseDouble(o1);
                     Double d2 = Double.parseDouble(o2);
                     return d1.compareTo(d2);
-                }catch(NumberFormatException e){
+                } catch (NumberFormatException e) {
                     e.printStackTrace();
                     return 0;
                 }
@@ -498,9 +503,8 @@ public class TrieDictionaryForestTest {
         double k = -0.0;
         int size = 0;
         StringBytesConverter converter = new StringBytesConverter();
-        for(int i=0;i<num;i++)
-        {
-            String value = k+"";
+        for (int i = 0; i < num; i++) {
+            String value = k + "";
             set.add(value);
             k += 1.55;
             String basic = "-9999999999999952517";
@@ -509,21 +513,20 @@ public class TrieDictionaryForestTest {
         int treeNum = 5;
         //TrieDictionaryForestBuilder<String> builder = 
newDictBuilder(set.iterator(),0,size / treeNum);
         //TrieDictionaryForest<String> dict = builder.build();
-        NumberDictionaryForestBuilder<String> builder = new 
NumberDictionaryForestBuilder<String>(new StringBytesConverter(),0);
-        builder.setMaxTrieSize(size / treeNum);
+        NumberDictionaryForestBuilder builder = new 
NumberDictionaryForestBuilder(0);
+        builder.setMaxTrieTreeSize(size / treeNum);
         Iterator<String> it = set.iterator();
-        while(it.hasNext()){
+        while (it.hasNext()) {
             String str = it.next();
-            if(str.contains("E")){
+            if (str.contains("E")) {
                 set.remove(str);
-            }
-            else{
+            } else {
                 builder.addValue(str);
             }
         }
 
-        NumberDictionaryForest<String> dict = builder.build();
-        System.out.println("tree size:"+dict.getTreeSize());
+        TrieDictionaryForest<String> dict = builder.build();
+        System.out.println("tree size:" + dict.getTrees().size());
         System.out.println("--------------dict-----------------");
         dict.dump(System.out);
         System.out.println("--------------set-------------------");
@@ -531,7 +534,7 @@ public class TrieDictionaryForestTest {
 
         //test special value
         String query1 = "183.82499999999996";
-        int id1 = dict.getIdFromValue(query1,1);
+        int id1 = dict.getIdFromValue(query1, 1);
         String actualValue = dict.getValueFromId(id1);
         //System.out.println("id:"+id1+"  value:"+actualValue);
         //System.out.println(set.ceiling(query1));
@@ -540,40 +543,36 @@ public class TrieDictionaryForestTest {
         int testTimes = 1000000;
         double queryBasic = -145.355;
         //test roundingFlag > 0
-        for(int i=0;i<testTimes;i++)
-        {
-            String query = queryBasic+"";
+        for (int i = 0; i < testTimes; i++) {
+            String query = queryBasic + "";
             //System.out.println("query:"+query);
             queryBasic += 1.51;
-            if(query.contains("E"))
+            if (query.contains("E"))
                 continue;
             try {
                 int id = dict.getIdFromValue(query, 1);
-                assertEquals(set.ceiling(query),dict.getValueFromId(id));
-            }catch(IllegalArgumentException e){
+                assertEquals(set.ceiling(query), dict.getValueFromId(id));
+            } catch (IllegalArgumentException e) {
                 assertNull(set.ceiling(query));
             }
         }
 
-
         //test roundingFlag < 0
         queryBasic = -551.3588;
-        for(int i=0;i<testTimes;i++)
-        {
-            String query = queryBasic+"";
+        for (int i = 0; i < testTimes; i++) {
+            String query = queryBasic + "";
             queryBasic += 1.0;
-            if(query.contains("E"))
+            if (query.contains("E"))
                 continue;
             try {
                 int id = dict.getIdFromValue(query, -1);
-                assertEquals(set.floor(query),dict.getValueFromId(id));
-            }catch(IllegalArgumentException e){
+                assertEquals(set.floor(query), dict.getValueFromId(id));
+            } catch (IllegalArgumentException e) {
                 assertNull(set.floor(query));
             }
         }
     }
 
-
     private static TrieDictionaryForest<String> 
testSerialize(TrieDictionaryForest<String> dict) {
         try {
             ByteArrayOutputStream bout = new ByteArrayOutputStream();
@@ -603,7 +602,7 @@ public class TrieDictionaryForestTest {
         dict.dump(System.out);
         byte[] data = converter.convertToBytes(value);
         int id = dict.getIdFromValueBytes(data,0,data.length);
-
+    
     }*/
 
     /*
@@ -612,14 +611,14 @@ public class TrieDictionaryForestTest {
     @Ignore
     @Test
     public void memoryUsageBenchmarkOldDictTest() throws Exception {
-        System.out.println("max memory:"+Runtime.getRuntime().maxMemory());
+        System.out.println("max memory:" + Runtime.getRuntime().maxMemory());
         System.gc();
         Thread.currentThread().sleep(1000);
         NumberDictionaryBuilder<String> b = new NumberDictionaryBuilder<>(new 
StringBytesConverter());
         int k = 0;
-        while(true){
-            b.addValue(k+"");
-            if(k%100000 == 0)
+        while (true) {
+            b.addValue(k + "");
+            if (k % 100000 == 0)
                 System.out.println(k);
             k++;
         }
@@ -629,14 +628,14 @@ public class TrieDictionaryForestTest {
     @Ignore
     @Test
     public void memoryUsageBenchmarkNewDictForestTest() throws Exception {
-        System.out.println("max memory:"+Runtime.getRuntime().maxMemory());
+        System.out.println("max memory:" + Runtime.getRuntime().maxMemory());
         System.gc();
         Thread.currentThread().sleep(3000);
-        NumberDictionaryForestBuilder<String> b = new 
NumberDictionaryForestBuilder<>(new StringBytesConverter(),0,0);
+        NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(0, 
0);
         int k = 0;
-        while(true){
-            b.addValue(k+"");
-            if(k%100000 == 0)
+        while (true) {
+            b.addValue(k + "");
+            if (k % 100000 == 0)
                 System.out.println(k);
             k++;
         }
@@ -648,12 +647,11 @@ public class TrieDictionaryForestTest {
         maxTrieSize:50M  entry:128400000
         maxTrieSize:25M  entry:148100000
         maxTrieSize:0M  entry: 5000000
-
+        
         5-8
          */
     }
 
-
     @Deprecated
     private long getSystemCurUsedMemory() throws Exception {
         System.gc();
@@ -703,19 +701,15 @@ public class TrieDictionaryForestTest {
             System.out.println("times:" + i);
         }
 
-
         System.out.println("compare build time.  Old trie : " + 
oldDictTotalBuildTime / 1000.0 + "s.New trie : " + newDictTotalBuildTime / 
1000.0 + "s");
     }
 
-
     @Test
     public void queryTimeBenchmarkTest() throws Exception {
         int count = (int) (Integer.MAX_VALUE * 0.8 / 640);
-        //int count = (int) (2);
         benchmarkStringDictionary(new RandomStrings(count));
     }
 
-
     private void evaluateDataSize(ArrayList<String> list) {
         long size = 0;
         for (String str : list)
@@ -763,7 +757,6 @@ public class TrieDictionaryForestTest {
             array[id] = converter.convertToBytes(value);
         }
 
-
         // System.out.println("Dict size in bytes:  " +
         //MemoryUtil.deepMemoryUsageOf(dict));
         // System.out.println("Map size in bytes:   " +
@@ -879,7 +872,7 @@ public class TrieDictionaryForestTest {
             }
         }
         int maxId = dict.getMaxId();
-        int[] notExistIds = {-10, -20, -Integer.MIN_VALUE, -Integer.MAX_VALUE, 
maxId + 1, maxId + 2};
+        int[] notExistIds = { -10, -20, -Integer.MIN_VALUE, 
-Integer.MAX_VALUE, maxId + 1, maxId + 2 };
         for (Integer i : notExistIds) {
             try {
                 dict.getValueFromId(i);
@@ -925,7 +918,7 @@ public class TrieDictionaryForestTest {
     public static TrieDictionaryForestBuilder<String> 
newDictBuilder(Iterator<String> strs, int baseId, int treeSize) {
         TrieDictionaryForestBuilder<String> b = new 
TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId);
         b.setMaxTrieTreeSize(treeSize);
-        while(strs.hasNext())
+        while (strs.hasNext())
             b.addValue(strs.next());
         return b;
     }
@@ -1000,7 +993,6 @@ public class TrieDictionaryForestTest {
         return r;
     }
 
-
     private ArrayList<String> getTestData(int count) {
         RandomStrings rs = new RandomStrings(count);
         Iterator<String> itr = rs.iterator();

http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
index 66946b7..33dca01 100644
--- 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
+++ 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
@@ -20,9 +20,9 @@ import org.apache.hadoop.io.Text;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.dict.NumberDictionary;
 import org.apache.kylin.dict.NumberDictionaryBuilder;
-import org.apache.kylin.dict.NumberDictionaryForest;
 import org.apache.kylin.dict.NumberDictionaryForestBuilder;
 import org.apache.kylin.dict.StringBytesConverter;
+import org.apache.kylin.dict.TrieDictionaryForest;
 import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey;
 import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag;
 import org.junit.Test;
@@ -51,14 +51,13 @@ public class NumberDictionaryForestTest {
         ArrayList<SelfDefineSortableKey> keyList = createKeyList(list, (byte) 
flag.ordinal());
         Collections.sort(keyList);
         //build tree
-        NumberDictionaryForestBuilder<String> b = new 
NumberDictionaryForestBuilder<String>(
-                new StringBytesConverter(), 0, 0);
+        NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(0, 
0);
 
         for (SelfDefineSortableKey key : keyList) {
             String fieldValue = printKey(key);
             b.addValue(fieldValue);
         }
-        NumberDictionaryForest<String> dict = b.build();
+        TrieDictionaryForest<String> dict = b.build();
         dict.dump(System.out);
         ArrayList<Integer> resultIds = new ArrayList<>();
         for (SelfDefineSortableKey key : keyList) {
@@ -81,10 +80,10 @@ public class NumberDictionaryForestTest {
         testData.add("2");
         testData.add("100");
         //TrieDictionaryForestBuilder.MaxTrieTreeSize = 0;
-        NumberDictionaryForestBuilder<String> b = new 
NumberDictionaryForestBuilder<String>(new StringBytesConverter());
+        NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder();
         for (String str : testData)
             b.addValue(str);
-        NumberDictionaryForest<String> dict = b.build();
+        TrieDictionaryForest<String> dict = b.build();
         dict = testSerialize(dict);
         dict.dump(System.out);
         for (String str : testData) {
@@ -99,10 +98,10 @@ public class NumberDictionaryForestTest {
         testData.add(Double.MIN_VALUE + "");
         testData.add("1.01");
         testData.add("2.0");
-        NumberDictionaryForestBuilder<String> b = new 
NumberDictionaryForestBuilder<String>(new StringBytesConverter());
+        NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder();
         for (String str : testData)
             b.addValue(str);
-        NumberDictionaryForest<String> dict = b.build();
+        TrieDictionaryForest<String> dict = b.build();
         dict.dump(System.out);
 
         NumberDictionaryBuilder<String> b2 = new NumberDictionaryBuilder<>(new 
StringBytesConverter());
@@ -113,7 +112,7 @@ public class NumberDictionaryForestTest {
 
     }
 
-    private static NumberDictionaryForest<String> 
testSerialize(NumberDictionaryForest<String> dict) {
+    private static TrieDictionaryForest<String> 
testSerialize(TrieDictionaryForest<String> dict) {
         try {
             ByteArrayOutputStream bout = new ByteArrayOutputStream();
             DataOutputStream dataout = new DataOutputStream(bout);
@@ -121,7 +120,7 @@ public class NumberDictionaryForestTest {
             dataout.close();
             ByteArrayInputStream bin = new 
ByteArrayInputStream(bout.toByteArray());
             DataInputStream datain = new DataInputStream(bin);
-            NumberDictionaryForest<String> r = new NumberDictionaryForest<>();
+            TrieDictionaryForest<String> r = new TrieDictionaryForest<>();
             //r.dump(System.out);
             r.readFields(datain);
             //r.dump(System.out);

http://git-wip-us.apache.org/repos/asf/kylin/blob/0a0c5547/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/SelfDefineSortableKeyTest.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/SelfDefineSortableKeyTest.java
 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/SelfDefineSortableKeyTest.java
index 858bba4..81aa836 100644
--- 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/SelfDefineSortableKeyTest.java
+++ 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/SelfDefineSortableKeyTest.java
@@ -1,31 +1,21 @@
 package org.apache.kylin.engine.mr.steps;
 
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.io.Text;
-import org.apache.kylin.common.util.Array;
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.dict.NumberDictionaryForest;
-import org.apache.kylin.dict.NumberDictionaryForestBuilder;
-import org.apache.kylin.dict.StringBytesConverter;
-import org.apache.kylin.dict.TrieDictionary;
-import org.apache.kylin.dict.TrieDictionaryBuilder;
-import org.apache.kylin.dict.TrieDictionaryForest;
-import org.apache.kylin.dict.TrieDictionaryForestBuilder;
-import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey;
-import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag;
-import org.junit.Test;
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Random;
 import java.util.UUID;
 
+import org.apache.hadoop.io.Text;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.engine.mr.steps.fdc2.SelfDefineSortableKey;
+import org.apache.kylin.engine.mr.steps.fdc2.TypeFlag;
+import org.junit.Test;
+
 /**
  * Created by xiefan on 16-11-2.
  */

Reply via email to