Repository: kylin Updated Branches: refs/heads/2.0-rc 3904a1767 -> 9f743fb78
KYLIN-1307 revisit growing dictionary Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/9f743fb7 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/9f743fb7 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/9f743fb7 Branch: refs/heads/2.0-rc Commit: 9f743fb7871d02f2c06e8fe21d288de1f502c939 Parents: 3904a17 Author: honma <ho...@ebay.com> Authored: Tue Jan 12 16:06:39 2016 +0800 Committer: honma <ho...@ebay.com> Committed: Tue Jan 12 16:06:39 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/KylinConfigBase.java | 5 +- .../apache/kylin/dict/DictionaryManager.java | 58 +++++++++++++++----- .../apache/kylin/rest/service/CacheService.java | 2 + 3 files changed, 51 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/9f743fb7/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index a5ca690..4c640f3 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -412,7 +412,6 @@ public class KylinConfigBase implements Serializable { return Integer.parseInt(getOptional("kylin.dict.cache.max.entry", "3000")); } - public boolean getQueryRunLocalCoprocessor() { return Boolean.parseBoolean(getOptional("kylin.query.run.local.coprocessor", "false")); } @@ -453,6 +452,10 @@ public class KylinConfigBase implements Serializable { return Integer.parseInt(this.getOptional("kylin.hbase.scan.cache_rows", "1024")); } + public boolean isGrowingDictEnabled() { + return Boolean.parseBoolean(this.getOptional("kylin.dict.growing.enabled", "false")); + } + public int getHBaseScanMaxResultSize() { return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB } http://git-wip-us.apache.org/repos/asf/kylin/blob/9f743fb7/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index b3cd634..cd12f01 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -139,26 +139,58 @@ public class DictionaryManager { initDictInfo(newDict, newDictInfo); - DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo); - if (largestDictInfo != null) { - largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); - Dictionary<?> largestDictObject = largestDictInfo.getDictionaryObject(); - if (largestDictObject.contains(newDict)) { - logger.info("dictionary content " + newDict + ", is contained by dictionary at " + largestDictInfo.getResourcePath()); - return largestDictInfo; - } else if (newDict.contains(largestDictObject)) { - logger.info("dictionary content " + newDict + " is by far the largest, save it"); - return saveNewDict(newDictInfo); + if (KylinConfig.getInstanceFromEnv().isGrowingDictEnabled()) { + DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo); + if (largestDictInfo != null) { + largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); + Dictionary<?> largestDictObject = largestDictInfo.getDictionaryObject(); + if (largestDictObject.contains(newDict)) { + logger.info("dictionary content " + newDict + ", is contained by dictionary at " + largestDictInfo.getResourcePath()); + return largestDictInfo; + } else if (newDict.contains(largestDictObject)) { + logger.info("dictionary content " + newDict + " is by far the largest, save it"); + return saveNewDict(newDictInfo); + } else { + logger.info("merge dict and save..."); + return mergeDictionary(Lists.newArrayList(newDictInfo, largestDictInfo)); + } } else { - logger.info("merge dict and save..."); - return mergeDictionary(Lists.newArrayList(newDictInfo, largestDictInfo)); + logger.info("first dict of this column, save it directly"); + return saveNewDict(newDictInfo); } } else { - logger.info("first dict of this column, save it directly"); + logger.info("Growing dict is not enabled"); + String dupDict = checkDupByContent(newDictInfo, newDict); + if (dupDict != null) { + logger.info("Identical dictionary content, reuse existing dictionary at " + dupDict); + return getDictionaryInfo(dupDict); + } + return saveNewDict(newDictInfo); } } + private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<?> dict) throws IOException { + ResourceStore store = MetadataManager.getInstance(config).getStore(); + ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); + if (existings == null) + return null; + + logger.info("{} existing dictionaries of the same column", existings.size()); + if (existings.size() > 100) { + logger.warn("Too many dictionaries under {}, dict count: {}", dictInfo.getResourceDir(), existings.size()); + } + + for (String existing : existings) { + DictionaryInfo existingInfo = getDictionaryInfo(existing); + if (existingInfo != null && dict.equals(existingInfo.getDictionaryObject())) { + return existing; + } + } + + return null; + } + private void initDictInfo(Dictionary<?> newDict, DictionaryInfo newDictInfo) { newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); http://git-wip-us.apache.org/repos/asf/kylin/blob/9f743fb7/server/src/main/java/org/apache/kylin/rest/service/CacheService.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/kylin/rest/service/CacheService.java b/server/src/main/java/org/apache/kylin/rest/service/CacheService.java index 8227be6..c8bc941 100644 --- a/server/src/main/java/org/apache/kylin/rest/service/CacheService.java +++ b/server/src/main/java/org/apache/kylin/rest/service/CacheService.java @@ -38,6 +38,7 @@ import org.apache.kylin.common.restclient.Broadcaster; import org.apache.kylin.cube.CubeDescManager; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.dict.DictionaryManager; import org.apache.kylin.engine.streaming.StreamingManager; import org.apache.kylin.invertedindex.IIDescManager; import org.apache.kylin.invertedindex.IIManager; @@ -203,6 +204,7 @@ public class CacheService extends BasicService { CubeDescManager.clearCache(); break; case ALL: + DictionaryManager.clearCache(); MetadataManager.clearCache(); CubeDescManager.clearCache(); CubeManager.clearCache();