Repository: kylin Updated Branches: refs/heads/KYLIN-2217-2 [created] fde30a157
KYLIN-2217 add saveDictionary() on CubeManager & DictionaryManager Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fde30a15 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fde30a15 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fde30a15 Branch: refs/heads/KYLIN-2217-2 Commit: fde30a157dff088b796cc778be34201a4e6e6dd4 Parents: 827205f Author: Li Yang <liy...@apache.org> Authored: Thu Nov 24 17:24:57 2016 +0800 Committer: Li Yang <liy...@apache.org> Committed: Thu Nov 24 17:24:57 2016 +0800 ---------------------------------------------------------------------- .../java/org/apache/kylin/cube/CubeManager.java | 47 +++++++------- .../kylin/cube/cli/DictionaryGeneratorCLI.java | 34 +++++++++- .../org/apache/kylin/cube/util/CubingUtils.java | 2 +- .../org/apache/kylin/dict/DictionaryInfo.java | 5 ++ .../apache/kylin/dict/DictionaryManager.java | 65 +++++++++----------- .../engine/mr/steps/MergeCuboidMapperTest.java | 2 +- .../kylin/cube/ITDictionaryManagerTest.java | 6 +- 7 files changed, 93 insertions(+), 68 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java index 9893040..3a4c754 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeManager.java @@ -48,7 +48,6 @@ import org.apache.kylin.cube.model.DictionaryDesc; import org.apache.kylin.cube.model.DimensionDesc; import org.apache.kylin.dict.DictionaryInfo; import org.apache.kylin.dict.DictionaryManager; -import org.apache.kylin.dict.DistinctColumnValuesProvider; import org.apache.kylin.dict.lookup.LookupStringTable; import org.apache.kylin.dict.lookup.SnapshotManager; import org.apache.kylin.dict.lookup.SnapshotTable; @@ -214,25 +213,39 @@ public class CubeManager implements IRealizationProvider { return result; } - public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) throws IOException { + public DictionaryInfo buildDictionary(CubeSegment cubeSeg, TblColRef col, ReadableTable inpTable) throws IOException { CubeDesc cubeDesc = cubeSeg.getCubeDesc(); if (!cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col)) return null; - DictionaryManager dictMgr = getDictionaryManager(); String builderClass = cubeDesc.getDictionaryBuilderClass(col); - DictionaryInfo dictInfo = dictMgr.buildDictionary(cubeDesc.getModel(), col, factTableValueProvider, builderClass); + DictionaryInfo dictInfo = getDictionaryManager().buildDictionary(cubeDesc.getModel(), col, inpTable, builderClass); + saveDictionaryInfo(cubeSeg, col, dictInfo); + return dictInfo; + } + + public DictionaryInfo saveDictionary(CubeSegment cubeSeg, TblColRef col, ReadableTable inpTable, Dictionary<String> dict) throws IOException { + CubeDesc cubeDesc = cubeSeg.getCubeDesc(); + if (!cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col)) + return null; + + DictionaryInfo dictInfo = getDictionaryManager().saveDictionary(cubeDesc.getModel(), col, inpTable, dict); + + saveDictionaryInfo(cubeSeg, col, dictInfo); + return dictInfo; + } + + private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo) throws IOException { if (dictInfo != null) { Dictionary<?> dict = dictInfo.getDictionaryObject(); cubeSeg.putDictResPath(col, dictInfo.getResourcePath()); cubeSeg.getRowkeyStats().add(new Object[] { col.getName(), dict.getSize(), dict.getSizeOfId() }); - CubeUpdate cubeBuilder = new CubeUpdate(cubeSeg.getCubeInstance()); - cubeBuilder.setToUpdateSegs(cubeSeg); - updateCube(cubeBuilder); + CubeUpdate update = new CubeUpdate(cubeSeg.getCubeInstance()); + update.setToUpdateSegs(cubeSeg); + updateCube(update); } - return dictInfo; } /** @@ -617,24 +630,6 @@ public class CubeManager implements IRealizationProvider { } } - private long calculateStartOffsetForAppendSegment(CubeInstance cube) { - List<CubeSegment> existing = cube.getSegments(); - if (existing.isEmpty()) { - return 0; - } else { - return existing.get(existing.size() - 1).getSourceOffsetEnd(); - } - } - - private long calculateStartDateForAppendSegment(CubeInstance cube) { - List<CubeSegment> existing = cube.getSegments(); - if (existing.isEmpty()) { - return cube.getDescriptor().getPartitionDateStart(); - } else { - return existing.get(existing.size() - 1).getDateRangeEnd(); - } - } - private void checkBuildingSegment(CubeInstance cube) { int maxBuldingSeg = cube.getConfig().getMaxBuildingSegments(); if (cube.getBuildingSegments().size() >= maxBuldingSeg) { http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java index 89e2e9b..a6aeb96 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java @@ -26,9 +26,15 @@ import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.cube.model.DimensionDesc; +import org.apache.kylin.dict.DictionaryManager; import org.apache.kylin.dict.DistinctColumnValuesProvider; +import org.apache.kylin.metadata.MetadataManager; +import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.source.ReadableTable; +import org.apache.kylin.source.SourceFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,7 +57,8 @@ public class DictionaryGeneratorCLI { // dictionary for (TblColRef col : cubeSeg.getCubeDesc().getAllColumnsNeedDictionaryBuilt()) { logger.info("Building dictionary for " + col); - cubeMgr.buildDictionary(cubeSeg, col, factTableValueProvider); + ReadableTable inpTable = decideInputTable(cubeSeg.getModel(), col, factTableValueProvider); + cubeMgr.buildDictionary(cubeSeg, col, inpTable); } // snapshot @@ -67,4 +74,29 @@ public class DictionaryGeneratorCLI { cubeMgr.buildSnapshotTable(cubeSeg, tableIdentity); } } + + private static ReadableTable decideInputTable(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) { + KylinConfig config = model.getConfig(); + DictionaryManager dictMgr = DictionaryManager.getInstance(config); + TblColRef srcCol = dictMgr.decideSourceData(model, col); + String srcTable = srcCol.getTable(); + + ReadableTable inpTable; + if (model.isFactTable(srcTable)) { + inpTable = factTableValueProvider.getDistinctValuesFor(srcCol); + } else { + MetadataManager metadataManager = MetadataManager.getInstance(config); + TableDesc tableDesc = new TableDesc(metadataManager.getTableDesc(srcTable)); + if (TableDesc.TABLE_TYPE_VIRTUAL_VIEW.equalsIgnoreCase(tableDesc.getTableType())) { + TableDesc materializedTbl = new TableDesc(); + materializedTbl.setDatabase(config.getHiveDatabaseForIntermediateTable()); + materializedTbl.setName(tableDesc.getMaterializedName()); + inpTable = SourceFactory.createReadableTable(materializedTbl); + } else { + inpTable = SourceFactory.createReadableTable(tableDesc); + } + } + + return inpTable; + } } http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java index 10dad2c..413b907 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java @@ -170,7 +170,7 @@ public class CubingUtils { signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format("streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); - DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getTable(), tblColRef.getName(), tblColRef.getColumnDesc().getZeroBasedIndex(), tblColRef.getDatatype(), signature); + DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java index 4fba59a..8526467 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryInfo.java @@ -21,6 +21,7 @@ package org.apache.kylin.dict; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.common.persistence.RootPersistentEntity; import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.metadata.model.ColumnDesc; import org.apache.kylin.source.ReadableTable.TableSignature; import com.fasterxml.jackson.annotation.JsonAutoDetect; @@ -51,6 +52,10 @@ public class DictionaryInfo extends RootPersistentEntity { public DictionaryInfo() { } + public DictionaryInfo(ColumnDesc col, String dataType, TableSignature input) { + this(col.getTable().getIdentity(), col.getName(), col.getZeroBasedIndex(), dataType, input); + } + public DictionaryInfo(String sourceTable, String sourceColumn, int sourceColumnIndex, String dataType, TableSignature input) { this.updateRandomUuid(); http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index c33cd28..37e4757 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -38,12 +38,10 @@ import org.apache.kylin.metadata.MetadataManager; import org.apache.kylin.metadata.datatype.DataType; import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.JoinDesc; -import org.apache.kylin.metadata.model.TableDesc; import org.apache.kylin.metadata.model.TableRef; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.source.ReadableTable; import org.apache.kylin.source.ReadableTable.TableSignature; -import org.apache.kylin.source.SourceFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -273,45 +271,19 @@ public class DictionaryManager { } } - public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider) throws IOException { - return buildDictionary(model, col, factTableValueProvider, null); + public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable) throws IOException { + return buildDictionary(model, col, inpTable, null); } - public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, DistinctColumnValuesProvider factTableValueProvider, String builderClass) throws IOException { + public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, String builderClass) throws IOException { logger.info("building dictionary for " + col); - TblColRef srcCol = decideSourceData(model, col); - String srcTable = srcCol.getTable(); - String srcColName = srcCol.getName(); - int srcColIdx = srcCol.getColumnDesc().getZeroBasedIndex(); - - ReadableTable inpTable; - if (model.isFactTable(srcTable)) { - inpTable = factTableValueProvider.getDistinctValuesFor(srcCol); - } else { - MetadataManager metadataManager = MetadataManager.getInstance(config); - TableDesc tableDesc = new TableDesc(metadataManager.getTableDesc(srcTable)); - if (TableDesc.TABLE_TYPE_VIRTUAL_VIEW.equalsIgnoreCase(tableDesc.getTableType())) { - TableDesc materializedTbl = new TableDesc(); - materializedTbl.setDatabase(config.getHiveDatabaseForIntermediateTable()); - materializedTbl.setName(tableDesc.getMaterializedName()); - inpTable = SourceFactory.createReadableTable(materializedTbl); - } else { - inpTable = SourceFactory.createReadableTable(tableDesc); - } - } - - TableSignature inputSig = inpTable.getSignature(); - if (inputSig == null) // table does not exists - return null; - - DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcColName, srcColIdx, col.getDatatype(), inputSig); - - String dupDict = checkDupByInfo(dictInfo); - if (dupDict != null) { - logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); - return getDictionaryInfo(dupDict); + DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable); + String dupInfo = checkDupByInfo(dictInfo); + if (dupInfo != null) { + logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); + return getDictionaryInfo(dupInfo); } logger.info("Building dictionary object " + JsonUtil.writeValueAsString(dictInfo)); @@ -333,6 +305,27 @@ public class DictionaryManager { return trySaveNewDict(dictionary, dictInfo); } + public DictionaryInfo saveDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, Dictionary<String> dictionary) throws IOException { + DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable); + String dupInfo = checkDupByInfo(dictInfo); + if (dupInfo != null) { + logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupInfo); + return getDictionaryInfo(dupInfo); + } + + return trySaveNewDict(dictionary, dictInfo); + } + + private DictionaryInfo createDictionaryInfo(DataModelDesc model, TblColRef col, ReadableTable inpTable) throws IOException { + TblColRef srcCol = decideSourceData(model, col); + TableSignature inputSig = inpTable.getSignature(); + if (inputSig == null) // table does not exists + throw new IllegalStateException("Input table does not exist: " + inpTable); + + DictionaryInfo dictInfo = new DictionaryInfo(srcCol.getColumnDesc(), col.getDatatype(), inputSig); + return dictInfo; + } + /** * Decide a dictionary's source data, leverage PK-FK relationship. */ http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java index 6f3b7c9..075ec80 100644 --- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java +++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/MergeCuboidMapperTest.java @@ -119,7 +119,7 @@ public class MergeCuboidMapperTest extends LocalFileMetadataTestCase { signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath("fake_dict_for" + lfn.getName() + segment.getName()); - DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumnDesc().getName(), lfn.getColumnDesc().getZeroBasedIndex(), "string", signature); + DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getColumnDesc(), "string", signature); List<String> values = new ArrayList<>(); values.add("aaa"); http://git-wip-us.apache.org/repos/asf/kylin/blob/fde30a15/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java b/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java index 22ffd26..188a97a 100644 --- a/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java +++ b/kylin-it/src/test/java/org/apache/kylin/cube/ITDictionaryManagerTest.java @@ -65,10 +65,10 @@ public class ITDictionaryManagerTest extends LocalFileMetadataTestCase { MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C"); - DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData); + DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData.getDistinctValuesFor(col)); System.out.println(JsonUtil.writeValueAsIndentString(info1)); - DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData); + DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupData.getDistinctValuesFor(col)); System.out.println(JsonUtil.writeValueAsIndentString(info2)); // test check duplicate @@ -89,7 +89,7 @@ public class ITDictionaryManagerTest extends LocalFileMetadataTestCase { // test empty dictionary MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider(); - DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupEmpty); + DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), col, mockupEmpty.getDistinctValuesFor(col)); System.out.println(JsonUtil.writeValueAsIndentString(info3)); assertEquals(0, info3.getCardinality()); assertEquals(0, info3.getDictionaryObject().getSize());