This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 8f38936d58f198d60f7bde8ff233b26b015754f8 Author: sibingzhang <74443791+sibingzh...@users.noreply.github.com> AuthorDate: Fri Jan 27 16:42:20 2023 +0800 KYLIN-5496 Fix the query result is incorrect after the character type in the 'yyyy-mm-dd' format is converted to the timestamp type and then filtered --- .../kylin/query/routing/RealizationPruner.java | 2 +- .../sql/execution/datasource/FilePruner.scala | 31 +++++++++++++--------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java index b429cf0407..dc54aae001 100644 --- a/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java +++ b/src/query-common/src/main/java/org/apache/kylin/query/routing/RealizationPruner.java @@ -268,7 +268,7 @@ public class RealizationPruner { private static String normalization(String dateFormat, RexLiteral rexLiteral) { RelDataTypeFamily typeFamily = rexLiteral.getType().getFamily(); - if (SqlTypeFamily.DATE == typeFamily) { + if (SqlTypeFamily.DATE == typeFamily || SqlTypeFamily.TIMESTAMP == typeFamily) { // Calendar uses UTC timezone, just to keep RexLiteral's value(an instanceof DateString) long timeInMillis = ((Calendar) rexLiteral.getValue()).getTimeInMillis(); String dateStr = DateFormat.formatToDateStr(timeInMillis, dateFormat, RealizationPruner.UTC_ZONE); diff --git a/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala b/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala index b658a0bd7e..b97d402217 100644 --- a/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala +++ b/src/spark-project/spark-common/src/main/scala/org/apache/spark/sql/execution/datasource/FilePruner.scala @@ -26,6 +26,7 @@ import org.apache.kylin.common.util.{DateFormat, HadoopUtil} import org.apache.kylin.common.{KapConfig, KylinConfig, QueryContext} import org.apache.kylin.engine.spark.utils.{LogEx, LogUtils} import org.apache.kylin.metadata.cube.model.{DimensionRangeInfo, LayoutEntity, NDataflow, NDataflowManager} +import org.apache.kylin.metadata.datatype.DataType import org.apache.kylin.metadata.model.{PartitionDesc, TblColRef} import org.apache.kylin.metadata.project.NProjectManager import org.apache.spark.internal.Logging @@ -731,9 +732,9 @@ abstract class PushableColumnBase { case class SegDimFilters(dimRange: java.util.Map[String, DimensionRangeInfo], dimCols: java.util.Map[Integer, TblColRef]) extends Logging { private def insurance(id: String, value: Any) - (func: String => Filter): Filter = { + (func: Any => Filter): Filter = { if (dimRange.containsKey(id) && dimCols.containsKey(id.toInt)) { - func(value.toString) + func(value) } else { Trivial(true) } @@ -749,23 +750,29 @@ case class SegDimFilters(dimRange: java.util.Map[String, DimensionRangeInfo], di } def foldFilter(filter: Filter): Filter = { + + def getDataType(col: String, value: Any): DataType = { + if (value.isInstanceOf[Date] || value.isInstanceOf[Timestamp]) return DataType.getType("date") + dimCols.get(col.toInt).getType + } + filter match { case EqualTo(id, value: Any) => val col = escapeQuote(id) insurance(col, value) { ts => { - val dataType = dimCols.get(col.toInt).getType - Trivial(dataType.compare(ts, dimRange.get(col).getMin) >= 0 - && dataType.compare(ts, dimRange.get(col).getMax) <= 0) + val dataType = getDataType(col, value) + Trivial(dataType.compare(ts.toString, dimRange.get(col).getMin) >= 0 + && dataType.compare(ts.toString, dimRange.get(col).getMax) <= 0) } } case In(id, values: Array[Any]) => val col = escapeQuote(id) val satisfied = values.map(v => insurance(col, v) { ts => { - val dataType = dimCols.get(col.toInt).getType - Trivial(dataType.compare(ts, dimRange.get(col).getMin) >= 0 - && dataType.compare(ts, dimRange.get(col).getMax) <= 0) + val dataType = getDataType(col, v) + Trivial(dataType.compare(ts.toString, dimRange.get(col).getMin) >= 0 + && dataType.compare(ts.toString, dimRange.get(col).getMax) <= 0) } }).exists(_.equals(Trivial(true))) Trivial(satisfied) @@ -777,22 +784,22 @@ case class SegDimFilters(dimRange: java.util.Map[String, DimensionRangeInfo], di case GreaterThan(id, value: Any) => val col = escapeQuote(id) insurance(col, value) { - ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, dimRange.get(col).getMax) < 0) + ts => Trivial(getDataType(col, value).compare(ts.toString, dimRange.get(col).getMax) < 0) } case GreaterThanOrEqual(id, value: Any) => val col = escapeQuote(id) insurance(col, value) { - ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, dimRange.get(col).getMax) <= 0) + ts => Trivial(getDataType(col, value).compare(ts.toString, dimRange.get(col).getMax) <= 0) } case LessThan(id, value: Any) => val col = escapeQuote(id) insurance(col, value) { - ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, dimRange.get(col).getMin) > 0) + ts => Trivial(getDataType(col, value).compare(ts.toString, dimRange.get(col).getMin) > 0) } case LessThanOrEqual(id, value: Any) => val col = escapeQuote(id) insurance(col, value) { - ts => Trivial(dimCols.get(col.toInt).getType.compare(ts, dimRange.get(col).getMin) >= 0) + ts => Trivial(getDataType(col, value).compare(ts.toString, dimRange.get(col).getMin) >= 0) } case And(left: Filter, right: Filter) => And(foldFilter(left), foldFilter(right)) match {