Repository: kylin Updated Branches: refs/heads/yang-m1 d5b35bd17 -> 1f8ec67a5
minor, fix bug in HLLC for big cardinality Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1f8ec67a Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1f8ec67a Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1f8ec67a Branch: refs/heads/yang-m1 Commit: 1f8ec67a5d056ae857ef0bd7ec0374fb0e8ec14d Parents: d5b35bd Author: Yang Li <liy...@apache.org> Authored: Tue Apr 26 22:56:48 2016 +0800 Committer: Yang Li <liy...@apache.org> Committed: Tue Apr 26 22:57:29 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/measure/hllc/HyperLogLogPlusCounter.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/1f8ec67a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java index c153ec1..b5028d6 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/hllc/HyperLogLogPlusCounter.java @@ -171,16 +171,15 @@ public class HyperLogLogPlusCounter implements Serializable, Comparable<HyperLog registerSum++; zeroBuckets++; } else { - registerSum += 1.0 / (1 << registers[i]); + registerSum += 1.0 / (1L << registers[i]); } } } public long getCountEstimate() { - int m = (int) Math.pow(2, p); - double alpha = 1 / (2 * Math.log(2) * (1 + (3 * Math.log(2) - 1) / m)); - double alphaMM = alpha * m * m; - double estimate = alphaMM / registerSum; + int m = 1 << p; + double alpha = 0.7213 / (1 + 1.079 / m); + double estimate = alpha * m * m / registerSum; // small cardinality adjustment if (zeroBuckets >= m * 0.07) { // (reference presto's HLL impl)