This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch 2.4.x in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 1bdb1c1b853a7db85ddd06004ec56574d0d83815 Author: Li Yang <liy...@apache.org> AuthorDate: Sat May 26 14:54:28 2018 +0800 KYLIN-2662 fix NegativeArraySizeException in TrieDictionaryForest Signed-off-by: shaofengshi <shaofeng...@apache.org> --- .../exceptions/TooBigDictionaryException.java | 34 ++++++++++++++++++++++ .../java/org/apache/kylin/dict/TrieDictionary.java | 3 ++ .../apache/kylin/dict/TrieDictionaryBuilder.java | 2 +- .../apache/kylin/dict/TrieDictionaryForest.java | 2 +- .../kylin/dict/TrieDictionaryForestBuilder.java | 17 +++++++++-- 5 files changed, 54 insertions(+), 4 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/exceptions/TooBigDictionaryException.java b/core-common/src/main/java/org/apache/kylin/common/exceptions/TooBigDictionaryException.java new file mode 100644 index 0000000..ab08c85 --- /dev/null +++ b/core-common/src/main/java/org/apache/kylin/common/exceptions/TooBigDictionaryException.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.common.exceptions; + +/** + * @author TinChiWay + * @date 2018/4/2 + */ +@SuppressWarnings("serial") +public class TooBigDictionaryException extends RuntimeException { + public TooBigDictionaryException(String message, Exception e) { + super(message, e); + } + + public TooBigDictionaryException(String message) { + super(message); + } +} diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java index 754f451..d531c05 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionary.java @@ -146,6 +146,9 @@ public class TrieDictionary<T> extends CacheDictionary<T> { return maxValueLength; } + public int getStorageSizeInBytes() { + return trieBytes.length; + } @Override protected int getIdFromValueBytesWithoutCache(byte[] value, int offset, int len, int roundingFlag) { diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java index 18169ca..b3440a1 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryBuilder.java @@ -39,7 +39,7 @@ import org.apache.kylin.common.util.BytesUtil; */ public class TrieDictionaryBuilder<T> { - private static final int _2GB = 2000000000; + public static final int _2GB = 2000000000; public static class Node { public byte[] part; diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java index 09d5bc2..4642cf4 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForest.java @@ -189,8 +189,8 @@ public class TrieDictionaryForest<T> extends CacheDictionary<T> { //write tree size headOut.writeInt(trees.size()); headOut.close(); - byte[] head = byteBuf.toByteArray(); //output + byte[] head = byteBuf.toByteArray(); out.writeInt(head.length); out.write(head); } diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java index 0e5e63e..29cff2e 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/TrieDictionaryForestBuilder.java @@ -17,13 +17,14 @@ */ package org.apache.kylin.dict; +import java.util.ArrayList; + import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.exceptions.TooBigDictionaryException; import org.apache.kylin.common.util.ByteArray; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; - /** * Build a trie dictionary forest if the input values is ordered, or the forest falls back to a single trie. */ @@ -134,6 +135,18 @@ public class TrieDictionaryForestBuilder<T> { byte[] valueBytes = tree.getValueBytesFromIdWithoutCache(minId); valueDivide.add(new ByteArray(valueBytes, 0, valueBytes.length)); curOffset += (tree.getMaxId() + 1); + + checkDictSize(); + } + + private void checkDictSize() { + // due to the limitation of resource store, no dictionary beyond 2GB is allowed + long size = 0; + for (TrieDictionary trie : trees) { + size += trie.getStorageSizeInBytes(); + } + if (size > TrieDictionaryBuilder._2GB) + throw new TooBigDictionaryException("Too big dictionary, dictionary cannot be bigger than 2GB"); } private void reset() {