[kylin] 11/22: KYLIN-5496 Fix the query result is incorrect after the character type in the 'yyyy-mm-dd' format is converted to the timestamp type and then filtered

xxyu Sat, 22 Apr 2023 23:00:57 -0700

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git


commit 8f38936d58f198d60f7bde8ff233b26b015754f8
Author: sibingzhang <74443791+sibingzh...@users.noreply.github.com>
AuthorDate: Fri Jan 27 16:42:20 2023 +0800

    KYLIN-5496 Fix the query result is incorrect after the character type in 
the 'yyyy-mm-dd' format is converted to the timestamp type and then filtered
---
 .../kylin/query/routing/RealizationPruner.java     |  2 +-
 .../sql/execution/datasource/FilePruner.scala      | 31 +++++++++++++---------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git 
a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
 
b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
index b429cf0407..dc54aae001 100644
--- 
a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
+++ 
b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java
@@ -268,7 +268,7 @@ public class RealizationPruner {
 
     private static String normalization(String dateFormat, RexLiteral 
rexLiteral) {
         RelDataTypeFamily typeFamily = rexLiteral.getType().getFamily();
-        if (SqlTypeFamily.DATE == typeFamily) {
+        if (SqlTypeFamily.DATE == typeFamily || SqlTypeFamily.TIMESTAMP == 
typeFamily) {
             // Calendar uses UTC timezone, just to keep RexLiteral's value(an 
instanceof DateString)
             long timeInMillis = ((Calendar) 
rexLiteral.getValue()).getTimeInMillis();
             String dateStr = DateFormat.formatToDateStr(timeInMillis, 
dateFormat, RealizationPruner.UTC_ZONE);
diff --git 
a/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala
 
b/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala
index b658a0bd7e..b97d402217 100644
--- 
a/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala
+++ 
b/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala
@@ -26,6 +26,7 @@ import org.apache.kylin.common.util.{DateFormat, HadoopUtil}
 import org.apache.kylin.common.{KapConfig, KylinConfig, QueryContext}
 import org.apache.kylin.engine.spark.utils.{LogEx, LogUtils}
 import org.apache.kylin.metadata.cube.model.{DimensionRangeInfo, LayoutEntity, 
NDataflow, NDataflowManager}
+import org.apache.kylin.metadata.datatype.DataType
 import org.apache.kylin.metadata.model.{PartitionDesc, TblColRef}
 import org.apache.kylin.metadata.project.NProjectManager
 import org.apache.spark.internal.Logging
@@ -731,9 +732,9 @@ abstract class PushableColumnBase {
 case class SegDimFilters(dimRange: java.util.Map[String, DimensionRangeInfo], 
dimCols: java.util.Map[Integer, TblColRef]) extends Logging {
 
   private def insurance(id: String, value: Any)
-                       (func: String => Filter): Filter = {
+                       (func: Any => Filter): Filter = {
     if (dimRange.containsKey(id) && dimCols.containsKey(id.toInt)) {
-      func(value.toString)
+      func(value)
     } else {
       Trivial(true)
     }
@@ -749,23 +750,29 @@ case class SegDimFilters(dimRange: java.util.Map[String, 
DimensionRangeInfo], di
   }
 
   def foldFilter(filter: Filter): Filter = {
+
+    def getDataType(col: String, value: Any): DataType = {
+      if (value.isInstanceOf[Date] || value.isInstanceOf[Timestamp]) return 
DataType.getType("date")
+      dimCols.get(col.toInt).getType
+    }
+
     filter match {
       case EqualTo(id, value: Any) =>
         val col = escapeQuote(id)
         insurance(col, value) {
           ts => {
-            val dataType = dimCols.get(col.toInt).getType
-            Trivial(dataType.compare(ts, dimRange.get(col).getMin) >= 0
-              && dataType.compare(ts, dimRange.get(col).getMax) <= 0)
+            val dataType = getDataType(col, value)
+            Trivial(dataType.compare(ts.toString, dimRange.get(col).getMin) >= 0
+              && dataType.compare(ts.toString, dimRange.get(col).getMax) <= 0)
           }
         }
       case In(id, values: Array[Any]) =>
         val col = escapeQuote(id)
         val satisfied = values.map(v => insurance(col, v) {
           ts => {
-            val dataType = dimCols.get(col.toInt).getType
-            Trivial(dataType.compare(ts, dimRange.get(col).getMin) >= 0
-              && dataType.compare(ts, dimRange.get(col).getMax) <= 0)
+            val dataType = getDataType(col, v)
+            Trivial(dataType.compare(ts.toString, dimRange.get(col).getMin) >= 0
+              && dataType.compare(ts.toString, dimRange.get(col).getMax) <= 0)
           }
         }).exists(_.equals(Trivial(true)))
         Trivial(satisfied)
@@ -777,22 +784,22 @@ case class SegDimFilters(dimRange: java.util.Map[String, 
DimensionRangeInfo], di
       case GreaterThan(id, value: Any) =>
         val col = escapeQuote(id)
         insurance(col, value) {
-          ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, 
dimRange.get(col).getMax) < 0)
+          ts => Trivial(getDataType(col, value).compare(ts.toString, 
dimRange.get(col).getMax) < 0)
         }
       case GreaterThanOrEqual(id, value: Any) =>
         val col = escapeQuote(id)
         insurance(col, value) {
-          ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, 
dimRange.get(col).getMax) <= 0)
+          ts => Trivial(getDataType(col, value).compare(ts.toString, 
dimRange.get(col).getMax) <= 0)
         }
       case LessThan(id, value: Any) =>
         val col = escapeQuote(id)
         insurance(col, value) {
-          ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, 
dimRange.get(col).getMin) > 0)
+          ts => Trivial(getDataType(col, value).compare(ts.toString, 
dimRange.get(col).getMin) > 0)
         }
       case LessThanOrEqual(id, value: Any) =>
         val col = escapeQuote(id)
         insurance(col, value) {
-          ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, 
dimRange.get(col).getMin) >= 0)
+          ts => Trivial(getDataType(col, value).compare(ts.toString, 
dimRange.get(col).getMin) >= 0)
         }
       case And(left: Filter, right: Filter) =>
         And(foldFilter(left), foldFilter(right)) match {

[kylin] 11/22: KYLIN-5496 Fix the query result is incorrect after the character type in the 'yyyy-mm-dd' format is converted to the timestamp type and then filtered

Reply via email to