Repository: kylin
Updated Branches:
  refs/heads/yang-m1 aebef6239 -> 51a7b012f


KYLIN-1623 Make the hll precision for data samping configurable


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/51a7b012
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/51a7b012
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/51a7b012

Branch: refs/heads/yang-m1
Commit: 51a7b012f1b6d7a1a84f5b9df8918bebad9b7050
Parents: aebef62
Author: shaofengshi <shaofeng...@apache.org>
Authored: Tue Apr 26 18:43:14 2016 +0800
Committer: shaofengshi <shaofeng...@apache.org>
Committed: Tue Apr 26 18:45:53 2016 +0800

----------------------------------------------------------------------
 .../src/main/java/org/apache/kylin/common/KylinConfigBase.java  | 4 ++++
 .../src/main/java/org/apache/kylin/cube/util/CubingUtils.java   | 2 +-
 .../java/org/apache/kylin/engine/mr/common/CubeStatsReader.java | 5 ++---
 .../kylin/engine/mr/steps/FactDistinctColumnsReducer.java       | 4 +++-
 .../kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java    | 2 +-
 .../org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java   | 2 +-
 .../main/java/org/apache/kylin/engine/spark/SparkCubing.java    | 2 +-
 7 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git 
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 011a76b..c9d1b98 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -641,4 +641,8 @@ abstract public class KylinConfigBase implements 
Serializable {
     public int getDimCountDistinctMaxCardinality() {
         return Integer.parseInt(getOptional("kylin.query.dim.distinct.max", 
"5000000"));
     }
+
+    public int getCubeStatsHLLPrecision() {
+        return 
Integer.parseInt(getOptional("kylin.job.cubing.inmem.sampling.hll.precision", 
"14"));
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java 
b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
index c541326..ac81318 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
@@ -102,7 +102,7 @@ public class CubingUtils {
         });
         final Map<Long, HyperLogLogPlusCounter> result = 
Maps.newHashMapWithExpectedSize(allCuboidIds.size());
         for (Long cuboidId : allCuboidIds) {
-            result.put(cuboidId, new HyperLogLogPlusCounter(14));
+            result.put(cuboidId, new 
HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
             Integer[] cuboidBitSet = new Integer[Long.bitCount(cuboidId)];
 
             long mask = Long.highestOneBit(baseCuboidId);

http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
index bac074b..44d5ce1 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java
@@ -42,7 +42,6 @@ import org.apache.kylin.cube.cuboid.CuboidScheduler;
 import org.apache.kylin.cube.kv.CubeDimEncMap;
 import org.apache.kylin.cube.model.CubeDesc;
 import org.apache.kylin.engine.mr.HadoopUtil;
-import org.apache.kylin.engine.mr.steps.InMemCuboidJob;
 import org.apache.kylin.measure.hllc.HyperLogLogPlusCounter;
 import org.apache.kylin.metadata.datatype.DataType;
 import org.apache.kylin.metadata.model.MeasureDesc;
@@ -68,7 +67,7 @@ import java.util.Map;
  */
 public class CubeStatsReader {
 
-    private static final Logger logger = 
LoggerFactory.getLogger(InMemCuboidJob.class);
+    private static final Logger logger = 
LoggerFactory.getLogger(CubeStatsReader.class);
 
     final CubeSegment seg;
     final int samplingPercentage;
@@ -100,7 +99,7 @@ public class CubeStatsReader {
                 } else if (key.get() == -1) {
                     mapperOverlapRatio = Bytes.toDouble(value.getBytes());
                 } else {
-                    HyperLogLogPlusCounter hll = new 
HyperLogLogPlusCounter(14);
+                    HyperLogLogPlusCounter hll = new 
HyperLogLogPlusCounter(kylinConfig.getCubeStatsHLLPrecision());
                     ByteArray byteArray = new ByteArray(value.getBytes());
                     hll.readRegisters(byteArray.asBuffer());
                     counterMap.put(key.get(), hll);

http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 211e947..e550183 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -61,6 +61,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
     private TblColRef col = null;
     private boolean isStatistics = false;
     private boolean outputTouched = false;
+    private KylinConfig cubeConfig;
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -70,6 +71,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
         KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
         String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
         CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
+        cubeConfig = cube.getConfig();
         cubeDesc = cube.getDescriptor();
         columnList =  
CubeManager.getInstance(config).getAllDictColumnsOnFact(cubeDesc);
 
@@ -106,7 +108,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
             // for hll
             long cuboidId = 0 - Bytes.toLong(key.getBytes(), 0, 
Bytes.SIZEOF_LONG);
             for (Text value : values) {
-                HyperLogLogPlusCounter hll = new HyperLogLogPlusCounter(14);
+                HyperLogLogPlusCounter hll = new 
HyperLogLogPlusCounter(cubeConfig.getCubeStatsHLLPrecision());
                 ByteBuffer bf = ByteBuffer.wrap(value.getBytes(), 0, 
value.getLength());
                 hll.readRegisters(bf);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
index 44de69a..f4c9e16 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java
@@ -73,7 +73,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends 
FactDistinctColumnsMap
 
             allCuboidsHLL = new HyperLogLogPlusCounter[cuboidIds.length];
             for (int i = 0; i < cuboidIds.length; i++) {
-                allCuboidsHLL[i] = new HyperLogLogPlusCounter(14);
+                allCuboidsHLL[i] = new 
HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision());
             }
 
             hf = Hashing.murmur3_32();

http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
index 8c37fec..fa6f9e2 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsStep.java
@@ -97,7 +97,7 @@ public class MergeStatisticsStep extends AbstractExecutable {
                             // sampling percentage;
                             averageSamplingPercentage += 
Bytes.toInt(value.getBytes());
                         } else if (key.get() > 0) {
-                            HyperLogLogPlusCounter hll = new 
HyperLogLogPlusCounter(14);
+                            HyperLogLogPlusCounter hll = new 
HyperLogLogPlusCounter(kylinConf.getCubeStatsHLLPrecision());
                             ByteArray byteArray = new 
ByteArray(value.getBytes());
                             hll.readRegisters(byteArray.asBuffer());
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/51a7b012/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
----------------------------------------------------------------------
diff --git 
a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java 
b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
index ef35067..70c1032 100644
--- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
+++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkCubing.java
@@ -246,7 +246,7 @@ public class SparkCubing extends AbstractApplication {
         List<Long> allCuboidIds = cuboidScheduler.getAllCuboidIds();
         final HashMap<Long, HyperLogLogPlusCounter> zeroValue = 
Maps.newHashMap();
         for (Long id : allCuboidIds) {
-            zeroValue.put(id, new HyperLogLogPlusCounter(14));
+            zeroValue.put(id, new 
HyperLogLogPlusCounter(cubeDesc.getConfig().getCubeStatsHLLPrecision()));
         }
 
         CubeJoinedFlatTableDesc flatTableDesc = new 
CubeJoinedFlatTableDesc(cubeDesc, null);

Reply via email to