KYLIN-2167 FactDistinctColumnsReducer may get wrong max/min partition col value


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7a20a06
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7a20a06
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7a20a06

Branch: refs/heads/KYLIN-2006
Commit: e7a20a063a3f007aa2a0ed5f39c616880ba46118
Parents: 4d9a923
Author: shaofengshi <shaofeng...@apache.org>
Authored: Tue Nov 8 14:23:11 2016 +0800
Committer: shaofengshi <shaofeng...@apache.org>
Committed: Tue Nov 8 21:29:22 2016 +0800

----------------------------------------------------------------------
 .../mr/steps/FactDistinctColumnsReducer.java    |  8 ----
 .../mr/steps/UpdateCubeInfoAfterBuildStep.java  | 40 +++++++++-----------
 2 files changed, 18 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a20a06/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 5b00381..b09e614 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -152,14 +152,6 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
                     cuboidHLLMap.put(cuboidId, hll);
                 }
             }
-        } else if (isPartitionCol == true) {
-            // for partition col min/max value
-            ByteArray value = new ByteArray(Bytes.copy(key.getBytes(), 1, 
key.getLength() - 1));
-            if (colValues.size() > 1) {
-                colValues.set(1, value);
-            } else {
-                colValues.add(value);
-            }
         } else {
             colValues.add(new ByteArray(Bytes.copy(key.getBytes(), 1, 
key.getLength() - 1)));
             if (colValues.size() == 1000000) { //spill every 1 million

http://git-wip-us.apache.org/repos/asf/kylin/blob/e7a20a06/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
index d285799..4d71f5d 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
@@ -19,6 +19,7 @@
 package org.apache.kylin.engine.mr.steps;
 
 import java.io.IOException;
+import java.text.ParseException;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -82,23 +83,8 @@ public class UpdateCubeInfoAfterBuildStep extends 
AbstractExecutable {
 
     private void updateTimeRange(CubeSegment segment) throws IOException {
         final TblColRef partitionCol = 
segment.getCubeDesc().getModel().getPartitionDesc().getPartitionDateColumnRef();
-        final String factDistinctPath = 
this.getParams().get(BatchConstants.CFG_OUTPUT_PATH);
-        final ReadableTable readableTable = new DFSFileTable(factDistinctPath 
+ "/" + partitionCol.getName(), -1);
-        final ReadableTable.TableReader tableReader = 
readableTable.getReader();
-        String minValue = null, maxValue = null;
-        try {
-            while (tableReader.next()) {
-                if (minValue == null) {
-                    minValue = tableReader.getRow()[0];
-                }
-                maxValue = tableReader.getRow()[0];
-            }
-        } finally {
-            IOUtils.closeQuietly(tableReader);
-        }
-
         final DataType partitionColType = partitionCol.getType();
-        FastDateFormat dateFormat;
+        final FastDateFormat dateFormat;
         if (partitionColType.isDate()) {
             dateFormat = 
DateFormat.getDateFormat(DateFormat.DEFAULT_DATE_PATTERN);
         } else if (partitionColType.isDatetime() || 
partitionColType.isTimestamp()) {
@@ -113,14 +99,24 @@ public class UpdateCubeInfoAfterBuildStep extends 
AbstractExecutable {
             throw new IllegalStateException("Type " + partitionColType + " is 
not valid partition column type");
         }
 
+        final String factDistinctPath = 
this.getParams().get(BatchConstants.CFG_OUTPUT_PATH);
+        final ReadableTable readableTable = new DFSFileTable(factDistinctPath 
+ "/" + partitionCol.getName(), -1);
+        final ReadableTable.TableReader tableReader = 
readableTable.getReader();
+        long minValue = Long.MAX_VALUE, maxValue = Long.MIN_VALUE;
         try {
-            long startTime = dateFormat.parse(minValue).getTime();
-            long endTime = dateFormat.parse(maxValue).getTime();
-            segment.setDateRangeStart(startTime);
-            segment.setDateRangeEnd(endTime);
-        } catch (Exception e) {
-            throw new IllegalStateException(e);
+            while (tableReader.next()) {
+                long time = 
dateFormat.parse(tableReader.getRow()[0]).getTime();
+                minValue = Math.min(minValue, time);
+                maxValue = Math.max(maxValue, time);
+            }
+        } catch (ParseException e) {
+            throw new IOException(e);
+        } finally {
+            IOUtils.closeQuietly(tableReader);
         }
+
+        segment.setDateRangeStart(minValue);
+        segment.setDateRangeEnd(maxValue);
     }
 
 }

Reply via email to