KYLIN-2167 FactDistinctColumnsReducer may get wrong max/min partition col value
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7a20a06 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7a20a06 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7a20a06 Branch: refs/heads/KYLIN-2006 Commit: e7a20a063a3f007aa2a0ed5f39c616880ba46118 Parents: 4d9a923 Author: shaofengshi <shaofeng...@apache.org> Authored: Tue Nov 8 14:23:11 2016 +0800 Committer: shaofengshi <shaofeng...@apache.org> Committed: Tue Nov 8 21:29:22 2016 +0800 ---------------------------------------------------------------------- .../mr/steps/FactDistinctColumnsReducer.java | 8 ---- .../mr/steps/UpdateCubeInfoAfterBuildStep.java | 40 +++++++++----------- 2 files changed, 18 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a20a06/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index 5b00381..b09e614 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java @@ -152,14 +152,6 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri cuboidHLLMap.put(cuboidId, hll); } } - } else if (isPartitionCol == true) { - // for partition col min/max value - ByteArray value = new ByteArray(Bytes.copy(key.getBytes(), 1, key.getLength() - 1)); - if (colValues.size() > 1) { - colValues.set(1, value); - } else { - colValues.add(value); - } } else { colValues.add(new ByteArray(Bytes.copy(key.getBytes(), 1, key.getLength() - 1))); if (colValues.size() == 1000000) { //spill every 1 million http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a20a06/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java index d285799..4d71f5d 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java @@ -19,6 +19,7 @@ package org.apache.kylin.engine.mr.steps; import java.io.IOException; +import java.text.ParseException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; @@ -82,23 +83,8 @@ public class UpdateCubeInfoAfterBuildStep extends AbstractExecutable { private void updateTimeRange(CubeSegment segment) throws IOException { final TblColRef partitionCol = segment.getCubeDesc().getModel().getPartitionDesc().getPartitionDateColumnRef(); - final String factDistinctPath = this.getParams().get(BatchConstants.CFG_OUTPUT_PATH); - final ReadableTable readableTable = new DFSFileTable(factDistinctPath + "/" + partitionCol.getName(), -1); - final ReadableTable.TableReader tableReader = readableTable.getReader(); - String minValue = null, maxValue = null; - try { - while (tableReader.next()) { - if (minValue == null) { - minValue = tableReader.getRow()[0]; - } - maxValue = tableReader.getRow()[0]; - } - } finally { - IOUtils.closeQuietly(tableReader); - } - final DataType partitionColType = partitionCol.getType(); - FastDateFormat dateFormat; + final FastDateFormat dateFormat; if (partitionColType.isDate()) { dateFormat = DateFormat.getDateFormat(DateFormat.DEFAULT_DATE_PATTERN); } else if (partitionColType.isDatetime() || partitionColType.isTimestamp()) { @@ -113,14 +99,24 @@ public class UpdateCubeInfoAfterBuildStep extends AbstractExecutable { throw new IllegalStateException("Type " + partitionColType + " is not valid partition column type"); } + final String factDistinctPath = this.getParams().get(BatchConstants.CFG_OUTPUT_PATH); + final ReadableTable readableTable = new DFSFileTable(factDistinctPath + "/" + partitionCol.getName(), -1); + final ReadableTable.TableReader tableReader = readableTable.getReader(); + long minValue = Long.MAX_VALUE, maxValue = Long.MIN_VALUE; try { - long startTime = dateFormat.parse(minValue).getTime(); - long endTime = dateFormat.parse(maxValue).getTime(); - segment.setDateRangeStart(startTime); - segment.setDateRangeEnd(endTime); - } catch (Exception e) { - throw new IllegalStateException(e); + while (tableReader.next()) { + long time = dateFormat.parse(tableReader.getRow()[0]).getTime(); + minValue = Math.min(minValue, time); + maxValue = Math.max(maxValue, time); + } + } catch (ParseException e) { + throw new IOException(e); + } finally { + IOUtils.closeQuietly(tableReader); } + + segment.setDateRangeStart(minValue); + segment.setDateRangeEnd(maxValue); } }