APACHE-KYLIN-2866: move hll shard base config to BatchConstants

Signed-off-by: lidongsjtu <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fba9d1d3
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fba9d1d3
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fba9d1d3

Branch: refs/heads/master
Commit: fba9d1d3dbe7d4dffbf8e0b87bf58b70baa7ffa6
Parents: 5425deb
Author: Zhong <[email protected]>
Authored: Thu Sep 28 17:49:42 2017 +0800
Committer: lidongsjtu <[email protected]>
Committed: Wed Dec 20 23:20:11 2017 +0800

----------------------------------------------------------------------
 .../apache/kylin/engine/mr/common/BatchConstants.java |  2 ++
 .../apache/kylin/engine/mr/common/MapReduceUtil.java  | 14 ++++++++++++++
 .../mr/steps/FactDistinctColumnPartitioner.java       | 11 ++++-------
 .../kylin/engine/mr/steps/FactDistinctColumnsJob.java | 14 +++-----------
 .../engine/mr/steps/FactDistinctColumnsReducer.java   |  8 ++++----
 5 files changed, 27 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
index 129c6dd..50c589a 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java
@@ -70,6 +70,8 @@ public interface BatchConstants {
     String CFG_SPARK_META_URL = "spark.meta.url";
     String CFG_GLOBAL_DICT_BASE_DIR = "global.dict.base.dir";
 
+    String CFG_HLL_SHARD_BASE = "mapreduce.partition.hll.shard.base";
+
     /**
      * command line ARGuments
      */

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
index 0379f64..b249f12 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java
@@ -35,6 +35,20 @@ public class MapReduceUtil {
     private static final Logger logger = 
LoggerFactory.getLogger(MapReduceUtil.class);
 
     /**
+     * @return reducer number for calculating hll
+     */
+    public static int getHLLShardBase(CubeSegment segment) {
+        int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size();
+        int shardBase = (nCuboids - 1) / 
segment.getConfig().getHadoopJobPerReducerHLLCuboidNumber() + 1;
+
+        int hllMaxReducerNumber = 
segment.getConfig().getHadoopJobHLLMaxReducerNumber();
+        if (shardBase > hllMaxReducerNumber) {
+            shardBase = hllMaxReducerNumber;
+        }
+        return shardBase;
+    }
+
+    /**
      * @param cuboidScheduler specified can provide more flexibility
      * */
     public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, 
CuboidScheduler cuboidScheduler,

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
index 7ac5d02..141ca99 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.BytesUtil;
+import org.apache.kylin.engine.mr.common.BatchConstants;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -32,12 +33,6 @@ import org.slf4j.LoggerFactory;
 public class FactDistinctColumnPartitioner extends 
Partitioner<SelfDefineSortableKey, Text> implements Configurable {
     private static final Logger logger = 
LoggerFactory.getLogger(FactDistinctColumnPartitioner.class);
 
-    public static final String HLL_SHARD_BASE_PROPERTY_NAME = 
"mapreduce.partition.factdistinctcolumnpartitioner.hll.shard.base";
-
-    public static void setHLLShard(Configuration conf, int hllShardBase) {
-        conf.setInt(HLL_SHARD_BASE_PROPERTY_NAME, hllShardBase);
-    }
-
     private Configuration conf;
     private int hllShardBase = 1;
 
@@ -60,12 +55,14 @@ public class FactDistinctColumnPartitioner extends 
Partitioner<SelfDefineSortabl
         }
     }
 
+    @Override
     public void setConf(Configuration conf) {
         this.conf = conf;
-        hllShardBase = conf.getInt(HLL_SHARD_BASE_PROPERTY_NAME, 1);
+        hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 1);
         logger.info("shard base for hll is " + hllShardBase);
     }
 
+    @Override
     public Configuration getConf() {
         return conf;
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
index dee384f..5200950 100755
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java
@@ -42,6 +42,7 @@ import 
org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat;
 import org.apache.kylin.engine.mr.MRUtil;
 import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.MapReduceUtil;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -136,15 +137,6 @@ public class FactDistinctColumnsJob extends 
AbstractHadoopJob {
 
     }
 
-    private int getHLLShardBase(CubeSegment segment) {
-        int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size();
-        int shardBase = (nCuboids - 1) / 
segment.getConfig().getFactDistinctJobPerReducerHLLCuboidNumber() + 1;
-        if (shardBase > 
segment.getConfig().getFactDistinctJobHLLMaxReducerNumber()) {
-            shardBase = 
segment.getConfig().getFactDistinctJobHLLMaxReducerNumber();
-        }
-        return shardBase;
-    }
-
     private void setupMapper(CubeSegment cubeSeg) throws IOException {
         IMRTableInputFormat flatTableInputFormat = 
MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
         flatTableInputFormat.configureJob(job);
@@ -159,8 +151,8 @@ public class FactDistinctColumnsJob extends 
AbstractHadoopJob {
             throws IOException {
         int numberOfReducers = reducerCount;
         if ("true".equalsIgnoreCase(statistics_enabled)) {
-            int hllShardBase = getHLLShardBase(cubeSeg);
-            FactDistinctColumnPartitioner.setHLLShard(job.getConfiguration(), 
hllShardBase);
+            int hllShardBase = MapReduceUtil.getHLLShardBase(cubeSeg);
+            job.getConfiguration().setInt(BatchConstants.CFG_HLL_SHARD_BASE, 
hllShardBase);
             numberOfReducers += (1 + hllShardBase);
         }
         job.setReducerClass(FactDistinctColumnsReducer.class);

http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index a733430..37972c0 100755
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -109,11 +109,11 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<SelfDefineSortableK
 
         boolean ifCol = true;
         if (collectStatistics) {
-            int hllShardBase = 
conf.getInt(FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME, 0);
+            int hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 
0);
             if (hllShardBase <= 0) {
-                throw new IllegalArgumentException("In job configuration the 
value for property "
-                        + 
FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME + " is " + 
hllShardBase
-                        + ". It should be set correctly!!!");
+                throw new IllegalArgumentException(
+                        "In job configuration the value for property " + 
BatchConstants.CFG_HLL_SHARD_BASE
+                        + " is " + hllShardBase + ". It should be set 
correctly!!!");
             }
             ifCol = false;
             if (taskId >= numberOfTasks - hllShardBase) {

Reply via email to