KYLIN-2249 Bug fix : build cube error when use inmem but ok with layer Signed-off-by: Yang Li <liy...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/4d52147e Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/4d52147e Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/4d52147e Branch: refs/heads/v2.0.0-release-hbase0.98 Commit: 4d52147e81e2581e39256e56680fd06ca1d4963c Parents: 6c376b2 Author: xiefan46 <958034...@qq.com> Authored: Tue Apr 25 18:10:09 2017 +0800 Committer: Yang Li <liy...@apache.org> Committed: Tue Apr 25 20:17:58 2017 +0800 ---------------------------------------------------------------------- .../kylin/dict/TrieDictionaryBuilder.java | 33 +++++++++++++------- .../apache/kylin/dict/TrieDictionaryForest.java | 14 ++++----- .../kylin/dict/TrieDictionaryForestBuilder.java | 2 +- .../kylin/dict/TrieDictionaryForestTest.java | 11 +++++++ .../apache/kylin/dict/TrieDictionaryTest.java | 10 ++++++ .../kylin/dimension/DictionaryDimEnc.java | 2 +- 6 files changed, 51 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java index 102c49e..1750ac1 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java @@ -34,11 +34,11 @@ import org.apache.kylin.common.util.BytesUtil; * Builds a dictionary using Trie structure. All values are taken in byte[] form * and organized in a Trie with ordering. Then numeric IDs are assigned in * sequence. - * + * * @author yangli9 */ public class TrieDictionaryBuilder<T> { - + private static final int _2GB = 2000000000; public static class Node { @@ -76,6 +76,8 @@ public class TrieDictionaryBuilder<T> { private Node root; protected BytesConverter<T> bytesConverter; + private boolean hasValue = false; + public TrieDictionaryBuilder(BytesConverter<T> bytesConverter) { this.root = new Node(new byte[0], false); this.bytesConverter = bytesConverter; @@ -91,6 +93,7 @@ public class TrieDictionaryBuilder<T> { } private void addValueR(Node node, byte[] value, int start) { + hasValue = true; // match the value part of current node int i = 0, j = start; int n = node.part.length, nn = value.length; @@ -179,10 +182,8 @@ public class TrieDictionaryBuilder<T> { public static class Stats { public int nValues; // number of values in total - public int nValueBytesPlain; // number of bytes for all values - // uncompressed - public int nValueBytesCompressed; // number of values bytes in Trie - // (compressed) + public int nValueBytesPlain; // number of bytes for all values uncompressed + public int nValueBytesCompressed; // number of values bytes in Trie (compressed) public int maxValueLength; // size of longest value in bytes // the trie is multi-byte-per-node @@ -234,7 +235,13 @@ public class TrieDictionaryBuilder<T> { } } - /** out print some statistics of the trie and the dictionary built from it */ + public boolean isHasValue() { + return hasValue; + } + + /** + * out print some statistics of the trie and the dictionary built from it + */ public Stats stats() { // calculate nEndValueBeneath traversePostOrder(new Visitor() { @@ -313,7 +320,9 @@ public class TrieDictionaryBuilder<T> { return s; } - /** out print trie for debug */ + /** + * out print trie for debug + */ public void print() { print(System.out); } @@ -396,11 +405,11 @@ public class TrieDictionaryBuilder<T> { /** * Flatten the trie into a byte array for a minimized memory footprint. * Lookup remains fast. Cost is inflexibility to modify (becomes immutable). - * + * <p> * Flattened node structure is HEAD + NODEs, for each node: * - o byte, offset to child node, o = stats.mbpn_sizeChildOffset - * - 1 bit, isLastChild flag, the 1st MSB of o - * - 1 bit, isEndOfValue flag, the 2nd MSB of o + * - 1 bit, isLastChild flag, the 1st MSB of o + * - 1 bit, isEndOfValue flag, the 2nd MSB of o * - c byte, number of values beneath, c = stats.mbpn_sizeNoValueBeneath * - 1 byte, number of value bytes * - n byte, value bytes @@ -417,7 +426,7 @@ public class TrieDictionaryBuilder<T> { Stats stats = stats(); int sizeNoValuesBeneath = stats.mbpn_sizeNoValueBeneath; int sizeChildOffset = stats.mbpn_sizeChildOffset; - + if (stats.mbpn_footprint <= 0) // must never happen, but let us be cautious throw new IllegalStateException("Too big dictionary, dictionary cannot be bigger than 2GB"); if (stats.mbpn_footprint > _2GB) http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java index 1023892..09d5bc2 100755 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java @@ -63,7 +63,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { } public TrieDictionaryForest(ArrayList<TrieDictionary<T>> trees, ArrayList<ByteArray> valueDivide, // - ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, int baseId) { + ArrayList<Integer> accuOffset, BytesConverter<T> bytesConverter, int baseId) { init(trees, valueDivide, accuOffset, bytesConverter, baseId); } @@ -342,7 +342,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { initSizeOfValue(); } - private void initMaxValueForEachTrie(){ + private void initMaxValueForEachTrie() { //init max value this.maxValue = new ArrayList<>(); if (this.trees == null || trees.isEmpty()) { @@ -356,7 +356,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { } } - private void initMaxId(){ + private void initMaxId() { if (trees.isEmpty()) { this.maxId = baseId - 1; return; @@ -365,7 +365,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { this.maxId = accuOffset.get(index) + trees.get(index).getMaxId() + baseId; } - private void initMinId(){ + private void initMinId() { if (trees.isEmpty()) { this.minId = baseId; return; @@ -373,8 +373,8 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { this.minId = trees.get(0).getMinId() + baseId; } - private void initSizeOfId(){ - if (trees.isEmpty()){ + private void initSizeOfId() { + if (trees.isEmpty()) { this.sizeOfId = 1; return; } @@ -383,7 +383,7 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { this.sizeOfId = BytesUtil.sizeForValue(baseId + maxOffset + lastTree.getMaxId() + 1L); } - private void initSizeOfValue(){ + private void initSizeOfValue() { int maxValue = 0; for (TrieDictionary<T> tree : trees) maxValue = Math.max(maxValue, tree.getSizeOfValue()); http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java index 69da472..0e5e63e 100755 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java @@ -105,7 +105,7 @@ public class TrieDictionaryForestBuilder<T> { } public TrieDictionaryForest<T> build() { - if (curTreeSize != 0) { //last tree + if (trieBuilder.isHasValue()) { //last tree TrieDictionary<T> tree = trieBuilder.build(0); addTree(tree); reset(); http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java index dd1f951..82380b3 100755 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java @@ -133,6 +133,17 @@ public class TrieDictionaryForestTest { } @Test + public void testAllNullValue() { + ArrayList<String> strs = new ArrayList<String>(); + strs.add(""); + int maxTreeSize = 10; + TrieDictionaryForestBuilder<String> builder = newDictBuilder(strs, 0, maxTreeSize); + TrieDictionaryForest<String> dict = builder.build(); + assertEquals(1, dict.getSize()); + assertEquals(0, dict.getIdFromValue("")); + } + + @Test public void testBigDataSet() { //h=generate data ArrayList<String> strs = new ArrayList<>(); http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java index 22a93a0..13c83ac 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java @@ -221,6 +221,16 @@ public class TrieDictionaryTest { testStringDictionary(str, null); } + @Test + public void testAllNullValue() { + ArrayList<String> strs = new ArrayList<String>(); + strs.add(""); + TrieDictionaryBuilder<String> builder = newDictBuilder(strs); + TrieDictionary<String> dict = builder.build(0); + assertEquals(1, dict.getSize()); + assertEquals(0, dict.getIdFromValue("")); + } + private static void benchmarkStringDictionary(Iterable<String> str) throws IOException { TrieDictionaryBuilder<String> b = newDictBuilder(str); b.stats().print(); http://git-wip-us.apache.org/repos/asf/kylin/blob/4d52147e/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java index 46f9117..b022d84 100644 --- a/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java +++ b/core-metadata/src/main/java/org/apache/kylin/dimension/DictionaryDimEnc.java @@ -30,7 +30,7 @@ import org.apache.kylin.metadata.datatype.DataTypeSerializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class DictionaryDimEnc extends DimensionEncoding implements Serializable{ +public class DictionaryDimEnc extends DimensionEncoding implements Serializable { private static final long serialVersionUID = 1L; private static final Logger logger = LoggerFactory.getLogger(DictionaryDimEnc.class);