APACHE-KYLIN-2866: move hll shard base config to BatchConstants Signed-off-by: lidongsjtu <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/fba9d1d3 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/fba9d1d3 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/fba9d1d3 Branch: refs/heads/master Commit: fba9d1d3dbe7d4dffbf8e0b87bf58b70baa7ffa6 Parents: 5425deb Author: Zhong <[email protected]> Authored: Thu Sep 28 17:49:42 2017 +0800 Committer: lidongsjtu <[email protected]> Committed: Wed Dec 20 23:20:11 2017 +0800 ---------------------------------------------------------------------- .../apache/kylin/engine/mr/common/BatchConstants.java | 2 ++ .../apache/kylin/engine/mr/common/MapReduceUtil.java | 14 ++++++++++++++ .../mr/steps/FactDistinctColumnPartitioner.java | 11 ++++------- .../kylin/engine/mr/steps/FactDistinctColumnsJob.java | 14 +++----------- .../engine/mr/steps/FactDistinctColumnsReducer.java | 8 ++++---- 5 files changed, 27 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java index 129c6dd..50c589a 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/BatchConstants.java @@ -70,6 +70,8 @@ public interface BatchConstants { String CFG_SPARK_META_URL = "spark.meta.url"; String CFG_GLOBAL_DICT_BASE_DIR = "global.dict.base.dir"; + String CFG_HLL_SHARD_BASE = "mapreduce.partition.hll.shard.base"; + /** * command line ARGuments */ http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java index 0379f64..b249f12 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/MapReduceUtil.java @@ -35,6 +35,20 @@ public class MapReduceUtil { private static final Logger logger = LoggerFactory.getLogger(MapReduceUtil.class); /** + * @return reducer number for calculating hll + */ + public static int getHLLShardBase(CubeSegment segment) { + int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size(); + int shardBase = (nCuboids - 1) / segment.getConfig().getHadoopJobPerReducerHLLCuboidNumber() + 1; + + int hllMaxReducerNumber = segment.getConfig().getHadoopJobHLLMaxReducerNumber(); + if (shardBase > hllMaxReducerNumber) { + shardBase = hllMaxReducerNumber; + } + return shardBase; + } + + /** * @param cuboidScheduler specified can provide more flexibility * */ public static int getLayeredCubingReduceTaskNum(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java index 7ac5d02..141ca99 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnPartitioner.java @@ -24,6 +24,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.BytesUtil; +import org.apache.kylin.engine.mr.common.BatchConstants; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,12 +33,6 @@ import org.slf4j.LoggerFactory; public class FactDistinctColumnPartitioner extends Partitioner<SelfDefineSortableKey, Text> implements Configurable { private static final Logger logger = LoggerFactory.getLogger(FactDistinctColumnPartitioner.class); - public static final String HLL_SHARD_BASE_PROPERTY_NAME = "mapreduce.partition.factdistinctcolumnpartitioner.hll.shard.base"; - - public static void setHLLShard(Configuration conf, int hllShardBase) { - conf.setInt(HLL_SHARD_BASE_PROPERTY_NAME, hllShardBase); - } - private Configuration conf; private int hllShardBase = 1; @@ -60,12 +55,14 @@ public class FactDistinctColumnPartitioner extends Partitioner<SelfDefineSortabl } } + @Override public void setConf(Configuration conf) { this.conf = conf; - hllShardBase = conf.getInt(HLL_SHARD_BASE_PROPERTY_NAME, 1); + hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 1); logger.info("shard base for hll is " + hllShardBase); } + @Override public Configuration getConf() { return conf; } http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java index dee384f..5200950 100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsJob.java @@ -42,6 +42,7 @@ import org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat; import org.apache.kylin.engine.mr.MRUtil; import org.apache.kylin.engine.mr.common.AbstractHadoopJob; import org.apache.kylin.engine.mr.common.BatchConstants; +import org.apache.kylin.engine.mr.common.MapReduceUtil; import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -136,15 +137,6 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob { } - private int getHLLShardBase(CubeSegment segment) { - int nCuboids = segment.getCuboidScheduler().getAllCuboidIds().size(); - int shardBase = (nCuboids - 1) / segment.getConfig().getFactDistinctJobPerReducerHLLCuboidNumber() + 1; - if (shardBase > segment.getConfig().getFactDistinctJobHLLMaxReducerNumber()) { - shardBase = segment.getConfig().getFactDistinctJobHLLMaxReducerNumber(); - } - return shardBase; - } - private void setupMapper(CubeSegment cubeSeg) throws IOException { IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); flatTableInputFormat.configureJob(job); @@ -159,8 +151,8 @@ public class FactDistinctColumnsJob extends AbstractHadoopJob { throws IOException { int numberOfReducers = reducerCount; if ("true".equalsIgnoreCase(statistics_enabled)) { - int hllShardBase = getHLLShardBase(cubeSeg); - FactDistinctColumnPartitioner.setHLLShard(job.getConfiguration(), hllShardBase); + int hllShardBase = MapReduceUtil.getHLLShardBase(cubeSeg); + job.getConfiguration().setInt(BatchConstants.CFG_HLL_SHARD_BASE, hllShardBase); numberOfReducers += (1 + hllShardBase); } job.setReducerClass(FactDistinctColumnsReducer.class); http://git-wip-us.apache.org/repos/asf/kylin/blob/fba9d1d3/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index a733430..37972c0 100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java @@ -109,11 +109,11 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK boolean ifCol = true; if (collectStatistics) { - int hllShardBase = conf.getInt(FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME, 0); + int hllShardBase = conf.getInt(BatchConstants.CFG_HLL_SHARD_BASE, 0); if (hllShardBase <= 0) { - throw new IllegalArgumentException("In job configuration the value for property " - + FactDistinctColumnPartitioner.HLL_SHARD_BASE_PROPERTY_NAME + " is " + hllShardBase - + ". It should be set correctly!!!"); + throw new IllegalArgumentException( + "In job configuration the value for property " + BatchConstants.CFG_HLL_SHARD_BASE + + " is " + hllShardBase + ". It should be set correctly!!!"); } ifCol = false; if (taskId >= numberOfTasks - hllShardBase) {
