Repository: kylin
Updated Branches:
  refs/heads/master af5965ccb -> 444013b4b


KYLIN-1379 More stable and functional precise count distinct implements after 
KYLIN-1186


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/d8898932
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/d8898932
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/d8898932

Branch: refs/heads/master
Commit: d8898932aa338983d3ff9c460306bd59e99782e5
Parents: af5965c
Author: sunyerui <sunye...@gmail.com>
Authored: Fri May 20 19:14:24 2016 +0800
Committer: Yang Li <liy...@apache.org>
Committed: Sun May 22 15:21:35 2016 +0800

----------------------------------------------------------------------
 .../kylin/measure/bitmap/BitmapMeasureType.java | 51 ++++++++++++++++----
 .../kylin/metadata/model/FunctionDesc.java      | 10 +++-
 ...t_kylin_cube_without_slr_left_join_desc.json | 17 ++-----
 .../query/sql_distinct_precisely/query00.sql    |  2 +-
 .../query/sql_distinct_precisely/query01.sql    |  2 +-
 .../query/sql_distinct_precisely/query02.sql    |  2 +-
 .../query/sql_distinct_precisely/query03.sql    |  2 +-
 .../query/sql_distinct_precisely/query04.sql    |  2 +-
 .../query/sql_distinct_precisely/query05.sql    |  2 +-
 .../query/sql_distinct_precisely/query06.sql    |  2 +-
 .../query/sql_distinct_precisely/query07.sql    |  2 +-
 webapp/app/js/model/cubeConfig.js               |  2 +-
 12 files changed, 61 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
----------------------------------------------------------------------
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
 
b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
index def3aee..da7b405 100644
--- 
a/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
+++ 
b/core-metadata/src/main/java/org/apache/kylin/measure/bitmap/BitmapMeasureType.java
@@ -29,6 +29,7 @@ import org.apache.kylin.metadata.model.FunctionDesc;
 import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
@@ -77,14 +78,8 @@ public class BitmapMeasureType extends 
MeasureType<BitmapCounter> {
             throw new IllegalArgumentException("BitmapMeasureType datatype is 
not " + DATATYPE_BITMAP + " but " + functionDesc.getReturnDataType().getName());
 
         List<TblColRef> colRefs = functionDesc.getParameter().getColRefs();
-        if (colRefs.size() != 1) {
-            throw new IllegalArgumentException("BitmapMeasureType col 
parameters count is not 1 but " + colRefs.size());
-        }
-
-        TblColRef colRef = colRefs.get(0);
-        DataType type = colRef.getType();
-        if (!type.isIntegerFamily()) {
-            throw new IllegalArgumentException("BitmapMeasureType col type is 
not IntegerFamily but " + type.getName() + " of column " + 
colRef.getCanonicalName());
+        if (colRefs.size() != 1 && colRefs.size() != 2) {
+            throw new IllegalArgumentException("Bitmap measure need 1 or 2 
parameters, but has " + colRefs.size());
         }
     }
 
@@ -100,10 +95,23 @@ public class BitmapMeasureType extends 
MeasureType<BitmapCounter> {
 
             @Override
             public BitmapCounter valueOf(String[] values, MeasureDesc 
measureDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) {
+                List<TblColRef> literalCols = 
measureDesc.getFunction().getParameter().getColRefs();
+                TblColRef literalCol = null;
+                if (literalCols.size() == 1) {
+                    literalCol = literalCols.get(0);
+                } else if (literalCols.size() == 2) {
+                    literalCol = literalCols.get(1);
+                } else {
+                    throw new IllegalArgumentException("Bitmap measure need 1 
or 2 parameters");
+                }
+                Dictionary<String> dictionary = dictionaryMap.get(literalCol);
                 BitmapCounter bitmap = current;
                 bitmap.clear();
-                for (String v : values)
-                    bitmap.add(v);
+                // bitmap measure may have two values due to two parameters, 
only the first value should be ingested
+                if (values != null && values.length > 0 && values[0] != null) {
+                    int id = dictionary.getIdFromValue(values[0]);
+                    bitmap.add(id);
+                }
                 return bitmap;
             }
         };
@@ -114,12 +122,35 @@ public class BitmapMeasureType extends 
MeasureType<BitmapCounter> {
         return new BitmapAggregator();
     }
 
+    /**
+     * generate dict with first col by default, and with second col if 
specified
+     *
+     * Typical case: we have col uuid, and another col flag_uuid (if flag==1, 
uuid, null),
+     * the metrics count(distinct uuid) and count(distinct flag_uuid) should 
both generate dict with uuid, instead of uuid and flag_uuid
+     */
+    @Override
+    public List<TblColRef> getColumnsNeedDictionary(FunctionDesc functionDesc) 
{
+        List<TblColRef> literalCols = functionDesc.getParameter().getColRefs();
+        if (literalCols.size() == 1) {
+            return Collections.singletonList(literalCols.get(0));
+        } else if (literalCols.size() == 2) {
+            return Collections.singletonList(literalCols.get(1));
+        } else {
+            throw new IllegalArgumentException("Bitmap measure need 1 or 2 
parameters");
+        }
+    }
+
     @Override
     public boolean needRewrite() {
         return true;
     }
 
     @Override
+    public boolean needCubeLevelDictionary() {
+        return true;
+    }
+
+    @Override
     public Class<?> getRewriteCalciteAggrFunctionClass() {
         return BitmapDistinctCountAggFunc.class;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
----------------------------------------------------------------------
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java 
b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
index c85f0e8..e1a9e88 100644
--- 
a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
+++ 
b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java
@@ -293,8 +293,14 @@ public class FunctionDesc {
             if (parameter == null) {
                 if (other.parameter != null)
                     return false;
-            } else if (!parameter.equals(other.parameter))
-                return false;
+            } else {
+                if (isCountDistinct()
+                        && (parameter.getType() == null ? 
other.parameter.getType() == null : 
parameter.getType().equals(other.parameter.getType()))
+                        && (parameter.getValue() == null ? 
other.parameter.getType() == null : 
parameter.getValue().equals(other.parameter.getValue())))
+                    return true;
+                else if (!parameter.equals(other.parameter))
+                    return false;
+            }
         }
         return true;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
----------------------------------------------------------------------
diff --git 
a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
 
b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
index 0ba85d9..e835e06 100644
--- 
a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
+++ 
b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
@@ -104,7 +104,7 @@
     },
     "dependent_measure_ref" : null
   }, {
-    "name" : "SELLER_CNT_HLL",
+    "name" : "SELLER_CNT_BITMAP",
     "function" : {
       "expression" : "COUNT_DISTINCT",
       "parameter" : {
@@ -112,7 +112,7 @@
         "value" : "SELLER_ID",
         "next_parameter" : null
       },
-      "returntype" : "hllc(10)"
+      "returntype" : "bitmap"
     },
     "dependent_measure_ref" : null
   }, {
@@ -132,17 +132,6 @@
     },
     "dependent_measure_ref" : null
   }, {
-    "name": "LEAF_CATEG_ID_BITMAP",
-    "function": {
-      "expression": "COUNT_DISTINCT",
-      "parameter": {
-        "type": "column",
-        "value": "LEAF_CATEG_ID"
-      },
-      "returntype": "bitmap"
-    },
-    "dependent_measure_ref": null
-  }, {
     "name" : "TOP_SELLER",
     "function" : {
       "expression" : "TOP_N",
@@ -249,7 +238,7 @@
       "name" : "f2",
       "columns" : [ {
         "qualifier" : "m",
-        "measure_refs" : [ "seller_cnt_hll", "seller_format_cnt", 
"leaf_categ_id_bitmap" ]
+        "measure_refs" : [ "seller_cnt_bitmap", "seller_format_cnt"]
       } ]
     }, {
       "name" : "f3",

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql
index e1e4a9e..a3948c3 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query00.sql
@@ -19,6 +19,6 @@
 select lstg_format_name, cal_dt,
  sum(price) as GMV,
  count(1) as TRANS_CNT,
- count(distinct leaf_categ_id) as LEAF_CATEG_CNT
+ count(distinct seller_id) as seller_count
  from test_kylin_fact
  group by lstg_format_name, cal_dt

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql
index c1868b8..e8579ef 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query01.sql
@@ -19,7 +19,7 @@
 select lstg_format_name,
  sum(price) as GMV,
  count(1) as TRANS_CNT,
- count(distinct leaf_categ_id) as LEAF_CATEG_CNT
+ count(distinct seller_id) as seller_count
  from test_kylin_fact
  where lstg_format_name='FP-GTC'
  group by lstg_format_name

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql
index 5a3527a..48f49e9 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query02.sql
@@ -19,7 +19,7 @@
 select lstg_format_name,
  sum(price) as GMV,
  count(1) as TRANS_CNT,
- count(distinct leaf_categ_id) as LEAF_CATEG_CNT
+ count(distinct seller_id) as seller_count
  from test_kylin_fact
  where lstg_format_name='FP-GTC'
  group by lstg_format_name

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql
index dacdc87..3bf72f1 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query03.sql
@@ -17,7 +17,7 @@
 --
 
 select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV
- , count(1) as TRANS_CNT, count(distinct test_kylin_fact.leaf_categ_id) as 
LEAF_CATEG_CNT
+ , count(1) as TRANS_CNT, count(distinct seller_id) as seller_count
  from test_kylin_fact
  inner JOIN edw.test_cal_dt as test_cal_dt
  ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql
index ff511c3..b9fcff4 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query04.sql
@@ -17,7 +17,7 @@
 --
 
 select test_cal_dt.week_beg_dt,sum(test_kylin_fact.price) as GMV
- , count(1) as TRANS_CNT, count(distinct test_kylin_fact.leaf_categ_id) as 
LEAF_CATEG_CNT
+ , count(1) as TRANS_CNT, count(distinct seller_id) as seller_count
  from test_kylin_fact
  inner JOIN edw.test_cal_dt as test_cal_dt
  ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql
index 3d5e5e8..dea09f7 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query05.sql
@@ -19,7 +19,7 @@
 select lstg_format_name,
  sum(price) as GMV,
  count(1) as TRANS_CNT,
- count(distinct leaf_categ_id) as LEAF_CATEG_CNT
+ count(distinct seller_id) as seller_count
  from test_kylin_fact
  group by lstg_format_name
  order by lstg_format_name

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql
index 858c92e..eb12620 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query06.sql
@@ -19,7 +19,7 @@
 select lstg_format_name,
  sum(price) as GMV,
  count(1) as TRANS_CNT,
- count(distinct leaf_categ_id) as LEAF_CATEG_CNT
+ count(distinct seller_id) as seller_count
  from test_kylin_fact
  where lstg_format_name='FP-GTC'
  group by lstg_format_name

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql 
b/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql
index 41252c4..9bd2663 100644
--- a/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql
+++ b/kylin-it/src/test/resources/query/sql_distinct_precisely/query07.sql
@@ -19,6 +19,6 @@
 select lstg_format_name,
  sum(price) as GMV,
  count(1) as TRANS_CNT,
- count(distinct leaf_categ_id) as LEAF_CATEG_CNT
+ count(distinct seller_id) as seller_count
  from test_kylin_fact
  group by lstg_format_name

http://git-wip-us.apache.org/repos/asf/kylin/blob/d8898932/webapp/app/js/model/cubeConfig.js
----------------------------------------------------------------------
diff --git a/webapp/app/js/model/cubeConfig.js 
b/webapp/app/js/model/cubeConfig.js
index 784b081..962c65d 100644
--- a/webapp/app/js/model/cubeConfig.js
+++ b/webapp/app/js/model/cubeConfig.js
@@ -47,7 +47,7 @@ KylinApp.constant('cubeConfig', {
     {name: 'Error Rate < 2.44%', value: 'hllc14'},
     {name: 'Error Rate < 1.72%', value: 'hllc15'},
     {name: 'Error Rate < 1.22%', value: 'hllc16'},
-    {name: 'Precisely (Only for Integer Family column)', value: 'bitmap'}
+    {name: 'Precisely (More Memory And Storage Needed)', value: 'bitmap'}
   ],
   topNTypes: [
     {name: 'Top 10', value: "topn(10)"},

Reply via email to