KYLIN-1379 More stable and functional precise count distinct implements after KYLIN-1186
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d8898932 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d8898932 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d8898932 Branch: refs/heads/master Commit: d8898932aa338983d3ff9c460306bd59e99782e5 Parents: af5965c Author: sunyerui <sunye...@gmail.com> Authored: Fri May 20 19:14:24 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Sun May 22 15:21:35 2016 +0800 ---------------------------------------------------------------------- .../kylin/measure/bitmap/BitmapMeasureType.java | 51 ++++++++++++++++---- .../kylin/metadata/model/FunctionDesc.java | 10 +++- ...t_kylin_cube_without_slr_left_join_desc.json | 17 ++----- .../query/sql_distinct_precisely/query00.sql | 2 +- .../query/sql_distinct_precisely/query01.sql | 2 +- .../query/sql_distinct_precisely/query02.sql | 2 +- .../query/sql_distinct_precisely/query03.sql | 2 +- .../query/sql_distinct_precisely/query04.sql | 2 +- .../query/sql_distinct_precisely/query05.sql | 2 +- .../query/sql_distinct_precisely/query06.sql | 2 +- .../query/sql_distinct_precisely/query07.sql | 2 +- webapp/app/js/model/cubeConfig.js | 2 +- 12 files changed, 61 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java index def3aee..da7b405 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java @@ -29,6 +29,7 @@ import org.apache.kylin.metadata.model.FunctionDesc; import org.apache.kylin.metadata.model.MeasureDesc; import org.apache.kylin.metadata.model.TblColRef; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -77,14 +78,8 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> { throw new IllegalArgumentException("BitmapMeasureType datatype is not " + DATATYPE_BITMAP + " but " + functionDesc.getReturnDataType().getName()); List<TblColRef> colRefs = functionDesc.getParameter().getColRefs(); - if (colRefs.size() != 1) { - throw new IllegalArgumentException("BitmapMeasureType col parameters count is not 1 but " + colRefs.size()); - } - - TblColRef colRef = colRefs.get(0); - DataType type = colRef.getType(); - if (!type.isIntegerFamily()) { - throw new IllegalArgumentException("BitmapMeasureType col type is not IntegerFamily but " + type.getName() + " of column " + colRef.getCanonicalName()); + if (colRefs.size() != 1 && colRefs.size() != 2) { + throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters, but has " + colRefs.size()); } } @@ -100,10 +95,23 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> { @Override public BitmapCounter valueOf(String[] values, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { + List<TblColRef> literalCols = measureDesc.getFunction().getParameter().getColRefs(); + TblColRef literalCol = null; + if (literalCols.size() == 1) { + literalCol = literalCols.get(0); + } else if (literalCols.size() == 2) { + literalCol = literalCols.get(1); + } else { + throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters"); + } + Dictionary<String> dictionary = dictionaryMap.get(literalCol); BitmapCounter bitmap = current; bitmap.clear(); - for (String v : values) - bitmap.add(v); + // bitmap measure may have two values due to two parameters, only the first value should be ingested + if (values != null && values.length > 0 && values[0] != null) { + int id = dictionary.getIdFromValue(values[0]); + bitmap.add(id); + } return bitmap; } }; @@ -114,12 +122,35 @@ public class BitmapMeasureType extends MeasureType<BitmapCounter> { return new BitmapAggregator(); } + /** + * generate dict with first col by default, and with second col if specified + * + * Typical case: we have col uuid, and another col flag_uuid (if flag==1, uuid, null), + * the metrics count(distinct uuid) and count(distinct flag_uuid) should both generate dict with uuid, instead of uuid and flag_uuid + */ + @Override + public List<TblColRef> getColumnsNeedDictionary(FunctionDesc functionDesc) { + List<TblColRef> literalCols = functionDesc.getParameter().getColRefs(); + if (literalCols.size() == 1) { + return Collections.singletonList(literalCols.get(0)); + } else if (literalCols.size() == 2) { + return Collections.singletonList(literalCols.get(1)); + } else { + throw new IllegalArgumentException("Bitmap measure need 1 or 2 parameters"); + } + } + @Override public boolean needRewrite() { return true; } @Override + public boolean needCubeLevelDictionary() { + return true; + } + + @Override public Class<?> getRewriteCalciteAggrFunctionClass() { return BitmapDistinctCountAggFunc.class; } http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index c85f0e8..e1a9e88 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -293,8 +293,14 @@ public class FunctionDesc { if (parameter == null) { if (other.parameter != null) return false; - } else if (!parameter.equals(other.parameter)) - return false; + } else { + if (isCountDistinct() + && (parameter.getType() == null ? other.parameter.getType() == null : parameter.getType().equals(other.parameter.getType())) + && (parameter.getValue() == null ? other.parameter.getType() == null : parameter.getValue().equals(other.parameter.getValue()))) + return true; + else if (!parameter.equals(other.parameter)) + return false; + } } return true; } http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json ---------------------------------------------------------------------- diff --git a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json index 0ba85d9..e835e06 100644 --- a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json +++ b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json @@ -104,7 +104,7 @@ }, "dependent_measure_ref" : null }, { - "name" : "SELLER_CNT_HLL", + "name" : "SELLER_CNT_BITMAP", "function" : { "expression" : "COUNT_DISTINCT", "parameter" : { @@ -112,7 +112,7 @@ "value" : "SELLER_ID", "next_parameter" : null }, - "returntype" : "hllc(10)" + "returntype" : "bitmap" }, "dependent_measure_ref" : null }, { @@ -132,17 +132,6 @@ }, "dependent_measure_ref" : null }, { - "name": "LEAF_CATEG_ID_BITMAP", - "function": { - "expression": "COUNT_DISTINCT", - "parameter": { - "type": "column", - "value": "LEAF_CATEG_ID" - }, - "returntype": "bitmap" - }, - "dependent_measure_ref": null - }, { "name" : "TOP_SELLER", "function" : { "expression" : "TOP_N", @@ -249,7 +238,7 @@ "name" : "f2", "columns" : [ { "qualifier" : "m", - "measure_refs" : [ "seller_cnt_hll", "seller_format_cnt", "leaf_categ_id_bitmap" ] + "measure_refs" : [ "seller_cnt_bitmap", "seller_format_cnt"] } ] }, { "name" : "f3", http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql index e1e4a9e..a3948c3 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql @@ -19,6 +19,6 @@ select lstg_format_name, cal_dt, sum(price) as GMV, count(1) as TRANS_CNT, - count(distinct leaf_categ_id) as LEAF_CATEG_CNT + count(distinct seller_id) as seller_count from test_kylin_fact group by lstg_format_name, cal_dt http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql index c1868b8..e8579ef 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql @@ -19,7 +19,7 @@ select lstg_format_name, sum(price) as GMV, count(1) as TRANS_CNT, - count(distinct leaf_categ_id) as LEAF_CATEG_CNT + count(distinct seller_id) as seller_count from test_kylin_fact where lstg_format_name='FP-GTC' group by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql index 5a3527a..48f49e9 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql @@ -19,7 +19,7 @@ select lstg_format_name, sum(price) as GMV, count(1) as TRANS_CNT, - count(distinct leaf_categ_id) as LEAF_CATEG_CNT + count(distinct seller_id) as seller_count from test_kylin_fact where lstg_format_name='FP-GTC' group by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql index dacdc87..3bf72f1 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql @@ -17,7 +17,7 @@ -- select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV - , count(1) as TRANS_CNT, count(distinct test_kylin_fact.leaf_categ_id) as LEAF_CATEG_CNT + , count(1) as TRANS_CNT, count(distinct seller_id) as seller_count from test_kylin_fact inner JOIN edw.test_cal_dt as test_cal_dt ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql index ff511c3..b9fcff4 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql @@ -17,7 +17,7 @@ -- select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV - , count(1) as TRANS_CNT, count(distinct test_kylin_fact.leaf_categ_id) as LEAF_CATEG_CNT + , count(1) as TRANS_CNT, count(distinct seller_id) as seller_count from test_kylin_fact inner JOIN edw.test_cal_dt as test_cal_dt ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql index 3d5e5e8..dea09f7 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql @@ -19,7 +19,7 @@ select lstg_format_name, sum(price) as GMV, count(1) as TRANS_CNT, - count(distinct leaf_categ_id) as LEAF_CATEG_CNT + count(distinct seller_id) as seller_count from test_kylin_fact group by lstg_format_name order by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql index 858c92e..eb12620 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql @@ -19,7 +19,7 @@ select lstg_format_name, sum(price) as GMV, count(1) as TRANS_CNT, - count(distinct leaf_categ_id) as LEAF_CATEG_CNT + count(distinct seller_id) as seller_count from test_kylin_fact where lstg_format_name='FP-GTC' group by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql b/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql index 41252c4..9bd2663 100644 --- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql +++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql @@ -19,6 +19,6 @@ select lstg_format_name, sum(price) as GMV, count(1) as TRANS_CNT, - count(distinct leaf_categ_id) as LEAF_CATEG_CNT + count(distinct seller_id) as seller_count from test_kylin_fact group by lstg_format_name http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/webapp/app/js/model/cubeConfig.js ---------------------------------------------------------------------- diff --git a/webapp/app/js/model/cubeConfig.js b/webapp/app/js/model/cubeConfig.js index 784b081..962c65d 100644 --- a/webapp/app/js/model/cubeConfig.js +++ b/webapp/app/js/model/cubeConfig.js @@ -47,7 +47,7 @@ KylinApp.constant('cubeConfig', { {name: 'Error Rate < 2.44%', value: 'hllc14'}, {name: 'Error Rate < 1.72%', value: 'hllc15'}, {name: 'Error Rate < 1.22%', value: 'hllc16'}, - {name: 'Precisely (Only for Integer Family column)', value: 'bitmap'} + {name: 'Precisely (More Memory And Storage Needed)', value: 'bitmap'} ], topNTypes: [ {name: 'Top 10', value: "topn(10)"},