KYLIN-2243 TopN memory estimation is inaccurate in some cases
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/ab657d1f Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/ab657d1f Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/ab657d1f Branch: refs/heads/master-hbase0.98 Commit: ab657d1f77a0a5edd0e6b12f496b5c2d86c74849 Parents: c93ea9c Author: shaofengshi <shaofeng...@apache.org> Authored: Tue Feb 14 16:45:16 2017 +0800 Committer: shaofengshi <shaofeng...@apache.org> Committed: Tue Feb 14 16:45:16 2017 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/cube/CubeDescManager.java | 54 ++++++++++++++++++++ .../measure/topn/TopNCounterSerializer.java | 10 +++- .../kylin/metadata/datatype/DataType.java | 1 + .../kylin/metadata/model/FunctionDesc.java | 4 ++ .../template/cube_desc/kylin_sales_cube.json | 2 +- .../localmeta/cube_desc/ci_inner_join_cube.json | 2 +- .../localmeta/cube_desc/ci_left_join_cube.json | 2 +- 7 files changed, 70 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java b/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java index 50312bf..dfc8f73 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/CubeDescManager.java @@ -21,8 +21,10 @@ package org.apache.kylin.cube; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.lang3.StringUtils; import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.JsonSerializer; import org.apache.kylin.common.persistence.ResourceStore; @@ -31,10 +33,17 @@ import org.apache.kylin.cube.cuboid.Cuboid; import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.cube.model.validation.CubeMetadataValidator; import org.apache.kylin.cube.model.validation.ValidateContext; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.dimension.DimensionEncoding; +import org.apache.kylin.dimension.DimensionEncodingFactory; +import org.apache.kylin.measure.topn.TopNMeasureType; import org.apache.kylin.metadata.MetadataConstants; import org.apache.kylin.metadata.cachesync.Broadcaster; import org.apache.kylin.metadata.cachesync.Broadcaster.Event; import org.apache.kylin.metadata.cachesync.CaseInsensitiveStringCache; +import org.apache.kylin.metadata.datatype.DataType; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.ParameterDesc; import org.apache.kylin.metadata.project.ProjectInstance; import org.apache.kylin.metadata.project.ProjectManager; import org.apache.kylin.metadata.realization.IRealization; @@ -207,6 +216,7 @@ public class CubeDescManager { logger.warn("Broken cube desc " + cubeDesc, e); cubeDesc.addError(e.getMessage()); } + postProcessCubeDesc(cubeDesc); // Check base validation if (!cubeDesc.getError().isEmpty()) { return cubeDesc; @@ -227,6 +237,49 @@ public class CubeDescManager { return cubeDesc; } + + /** + * if there is some change need be applied after getting a cubeDesc from front-end, do it here + * @param cubeDesc + */ + private void postProcessCubeDesc(CubeDesc cubeDesc) { + for (MeasureDesc measureDesc : cubeDesc.getMeasures()) { + if (TopNMeasureType.FUNC_TOP_N.equalsIgnoreCase(measureDesc.getFunction().getExpression())) { + // update return type scale with the estimated key length + Map<String, String> configuration = measureDesc.getFunction().getConfiguration(); + ParameterDesc parameter = measureDesc.getFunction().getParameter(); + parameter = parameter.getNextParameter(); + int keyLength = 0; + while (parameter != null) { + String encoding = configuration.get(TopNMeasureType.CONFIG_ENCODING_PREFIX + parameter.getValue()); + String encodingVersionStr = configuration.get(TopNMeasureType.CONFIG_ENCODING_VERSION_PREFIX + parameter.getValue()); + if (StringUtils.isEmpty(encoding) || DictionaryDimEnc.ENCODING_NAME.equals(encoding)) { + keyLength += 6; // estimation for dict encoding + } else { + // non-dict encoding + int encodingVersion = 1; + if (!StringUtils.isEmpty(encodingVersionStr)) { + try { + encodingVersion = Integer.parseInt(encodingVersionStr); + } catch (NumberFormatException e) { + throw new RuntimeException("invalid encoding version: " + encodingVersionStr); + } + } + Object[] encodingConf = DimensionEncoding.parseEncodingConf(encoding); + DimensionEncoding dimensionEncoding = DimensionEncodingFactory.create((String) encodingConf[0], (String[]) encodingConf[1], encodingVersion); + keyLength += dimensionEncoding.getLengthOfEncoding(); + } + + parameter = parameter.getNextParameter(); + } + + DataType returnType = DataType.getType(measureDesc.getFunction().getReturnType()); + DataType newReturnType = new DataType(returnType.getName(), returnType.getPrecision(), keyLength); + measureDesc.getFunction().setReturnType(newReturnType.toString()); + } + } + } + // remove cubeDesc public void removeCubeDesc(CubeDesc cubeDesc) throws IOException { String path = cubeDesc.getResourcePath(); @@ -291,6 +344,7 @@ public class CubeDescManager { return desc; } + postProcessCubeDesc(desc); // Semantic validation CubeMetadataValidator validator = new CubeMetadataValidator(); ValidateContext context = validator.validate(desc); http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java index cef9177..c35bd30 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounterSerializer.java @@ -35,8 +35,14 @@ public class TopNCounterSerializer extends DataTypeSerializer<TopNCounter<ByteAr private int precision; + private int scale; + public TopNCounterSerializer(DataType dataType) { this.precision = dataType.getPrecision(); + this.scale = dataType.getScale(); + if (scale < 0) { + scale = 6; + } } @Override @@ -54,12 +60,12 @@ public class TopNCounterSerializer extends DataTypeSerializer<TopNCounter<ByteAr @Override public int maxLength() { - return Math.max(precision * TopNCounter.EXTRA_SPACE_RATE * (4 + 8), 1024 * 1024); // use at least 1M + return Math.max(precision * TopNCounter.EXTRA_SPACE_RATE * (scale + 8), 1024 * 1024); // use at least 1M } @Override public int getStorageBytesEstimate() { - return precision * TopNCounter.EXTRA_SPACE_RATE * 8; + return precision * TopNCounter.EXTRA_SPACE_RATE * (scale + 8); } @Override http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java index d3756b8..83b2391 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java @@ -186,6 +186,7 @@ public class DataType implements Serializable { precision = 19; scale = 4; } + } private String replaceLegacy(String str) { http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index a49d982..cbd7574 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -228,6 +228,10 @@ public class FunctionDesc implements Serializable { return returnType; } + public void setReturnType(String returnType) { + this.returnType = returnType; + } + public DataType getReturnDataType() { return returnDataType; } http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/examples/sample_cube/template/cube_desc/kylin_sales_cube.json ---------------------------------------------------------------------- diff --git a/examples/sample_cube/template/cube_desc/kylin_sales_cube.json b/examples/sample_cube/template/cube_desc/kylin_sales_cube.json index 2a27305..1ad73bb 100644 --- a/examples/sample_cube/template/cube_desc/kylin_sales_cube.json +++ b/examples/sample_cube/template/cube_desc/kylin_sales_cube.json @@ -159,7 +159,7 @@ "next_parameter" : null } }, - "returntype" : "topn(100)" + "returntype" : "topn(100, 6)" } } ], "rowkey" : { http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json index 99013ce..7e14d51 100644 --- a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json +++ b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json @@ -182,7 +182,7 @@ "value" : "TEST_KYLIN_FACT.SELLER_ID" } }, - "returntype" : "topn(100)", + "returntype" : "topn(100, 4)", "configuration": {"topn.encoding.SELLER_ID" : "int:4"} } }, { http://git-wip-us.apache.org/repos/asf/kylin/blob/ab657d1f/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json index 51139ae..b59d6df 100644 --- a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json +++ b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json @@ -182,7 +182,7 @@ "value" : "TEST_KYLIN_FACT.SELLER_ID" } }, - "returntype" : "topn(100)", + "returntype" : "topn(100, 4)", "configuration": {"topn.encoding.SELLER_ID" : "int:4"} } }, {