KYLIN-3209: Make optimize job partial statistics path consistent with batch building ones
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/0f63fa61 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/0f63fa61 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/0f63fa61 Branch: refs/heads/sync Commit: 0f63fa617ffc9604e61ba024f8dcf8acb30a54a2 Parents: 5eedd13 Author: Ma Gang <mg4w...@163.com> Authored: Tue Jan 30 11:17:33 2018 +0800 Committer: Zhong <nju_y...@apache.org> Committed: Tue Jan 30 11:24:41 2018 +0800 ---------------------------------------------------------------------- .../java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java | 4 +++- .../apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/0f63fa61/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java index b1e59a7..f50a4be 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsWriter.java @@ -50,10 +50,12 @@ public class CubeStatsWriter { mapperOverlapRatio); } + //Be care of that the file name for partial cuboid statistics should start with BatchConstants.CFG_OUTPUT_STATISTICS, + //Then for later statistics merging, only files starting with BatchConstants.CFG_OUTPUT_STATISTICS will be used public static void writePartialCuboidStatistics(Configuration conf, Path outputPath, // Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio, int shard) throws IOException { - Path seqFilePath = new Path(outputPath, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME + "_" + shard); + Path seqFilePath = new Path(outputPath, BatchConstants.CFG_OUTPUT_STATISTICS + "_" + shard); writeCuboidStatisticsInner(conf, seqFilePath, cuboidHLLMap, samplingPercentage, mapperNumber, mapperOverlapRatio); } http://git-wip-us.apache.org/repos/asf/kylin/blob/0f63fa61/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java index 3f12b0d..434892c 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/MergeStatisticsWithOldStep.java @@ -86,7 +86,7 @@ public class MergeStatisticsWithOldStep extends AbstractExecutable { } Path[] statisticsFiles = HadoopUtil.getFilteredPath(hdfs, statisticsDirPath, - BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME); + BatchConstants.CFG_OUTPUT_STATISTICS); if (statisticsFiles == null) { throw new IOException("fail to find the statistics file in base dir: " + statisticsDirPath); }