This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch kylin5 in repository https://gitbox.apache.org/repos/asf/kylin.git
commit ab9b3dab31394013ab27c6fa5e8d81ccf9a0b7d2 Author: Pengfei Zhan <dethr...@gmail.com> AuthorDate: Thu Jun 8 17:24:24 2023 +0800 KYLIN-5717 constant project expression eliminate error of to_date/to_timestamp function --- .../org/apache/kylin/common/KylinConfigBase.java | 2 - .../org/apache/kylin/query/udf/SparkTimeUDF.java | 18 ++++++ .../kylin/query/udf/formatUdf/ToDateUDF.java | 55 ----------------- .../kylin/query/udf/formatUdf/ToTimestampUDF.java | 69 ---------------------- .../org/apache/kylin/query/udf/FormatUDFTest.java | 27 --------- .../kylin/query/runtime/ExpressionConverter.scala | 44 ++++---------- 6 files changed, 31 insertions(+), 184 deletions(-) diff --git a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 8e8a82164e..aec5a58ada 100644 --- a/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/src/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -2059,8 +2059,6 @@ public abstract class KylinConfigBase implements Serializable { udfMap.put("unix_timestamp", "org.apache.kylin.query.udf.dateUdf.UnixTimestampUDF"); udfMap.put("length", "org.apache.kylin.query.udf.stringUdf.LengthUDF"); udfMap.put("repeat", "org.apache.kylin.query.udf.stringUdf.RepeatUDF"); - udfMap.put("to_timestamp", "org.apache.kylin.query.udf.formatUdf.ToTimestampUDF"); - udfMap.put("to_date", "org.apache.kylin.query.udf.formatUdf.ToDateUDF"); udfMap.put("to_char", "org.apache.kylin.query.udf.formatUdf.ToCharUDF"); udfMap.put("date_format", "org.apache.kylin.query.udf.formatUdf.DateFormatUDF"); udfMap.put("instr", "org.apache.kylin.query.udf.stringUdf.InStrUDF"); diff --git a/src/query/src/main/java/org/apache/kylin/query/udf/SparkTimeUDF.java b/src/query/src/main/java/org/apache/kylin/query/udf/SparkTimeUDF.java index 3365625313..60f9805931 100644 --- a/src/query/src/main/java/org/apache/kylin/query/udf/SparkTimeUDF.java +++ b/src/query/src/main/java/org/apache/kylin/query/udf/SparkTimeUDF.java @@ -56,6 +56,24 @@ public class SparkTimeUDF implements NotConstant { throw new CalciteNotSupportException(); } + public Date TO_DATE(@Parameter(name = "left") Object left) throws CalciteNotSupportException { + throw new CalciteNotSupportException(); + } + + public Date TO_DATE(@Parameter(name = "left") Object left, @Parameter(name = "format") String fmt) + throws CalciteNotSupportException { + throw new CalciteNotSupportException(); + } + + public Timestamp TO_TIMESTAMP(@Parameter(name = "left") Object left, @Parameter(name = "format") String fmt) + throws CalciteNotSupportException { + throw new CalciteNotSupportException(); + } + + public Timestamp TO_TIMESTAMP(@Parameter(name = "left") Object left) throws CalciteNotSupportException { + throw new CalciteNotSupportException(); + } + public String FROM_UNIXTIME(@Parameter(name = "long1") Object exp1, @Parameter(name = "str2") String exp2) throws CalciteNotSupportException { throw new CalciteNotSupportException(); diff --git a/src/query/src/main/java/org/apache/kylin/query/udf/formatUdf/ToDateUDF.java b/src/query/src/main/java/org/apache/kylin/query/udf/formatUdf/ToDateUDF.java deleted file mode 100644 index dfb0284d1d..0000000000 --- a/src/query/src/main/java/org/apache/kylin/query/udf/formatUdf/ToDateUDF.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.query.udf.formatUdf; - -import java.sql.Date; - -import org.apache.calcite.linq4j.function.Parameter; - -public class ToDateUDF { - - public Date TO_DATE(@Parameter(name = "str1") String dateStr, @Parameter(name = "str2") String fmt) { - if (dateStr == null) { - return null; - } - Date date = null; - switch (fmt) { - case "yyyy-MM-dd": - dateStr = dateStr.substring(0, 10); - date = Date.valueOf(dateStr); - break; - case "yyyy-MM": - dateStr = dateStr.substring(0, 7) + "-01"; - date = Date.valueOf(dateStr); - break; - case "yyyy": - case "y": - dateStr = dateStr.substring(0, 4) + "-01-01"; - date = Date.valueOf(dateStr); - break; - default: - //date = null - } - return date; - } - - public Date TO_DATE(@Parameter(name = "str1") String date) { - return Date.valueOf(date); - } -} diff --git a/src/query/src/main/java/org/apache/kylin/query/udf/formatUdf/ToTimestampUDF.java b/src/query/src/main/java/org/apache/kylin/query/udf/formatUdf/ToTimestampUDF.java deleted file mode 100644 index 73696f3174..0000000000 --- a/src/query/src/main/java/org/apache/kylin/query/udf/formatUdf/ToTimestampUDF.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kylin.query.udf.formatUdf; - -import java.sql.Timestamp; - -import org.apache.calcite.linq4j.function.Parameter; - -/** - * Refer to to_timestamp() on spark SQL. - */ -public class ToTimestampUDF { - - public Timestamp TO_TIMESTAMP(@Parameter(name = "str1") String timestampStr, @Parameter(name = "str2") String fmt) { - if (timestampStr == null) { - return null; - } - Timestamp timestamp = null; - switch (fmt) { - case "yyyy-MM-dd hh:mm:ss": - timestamp = Timestamp.valueOf(timestampStr); - break; - case "yyyy-MM-dd hh:mm": - timestampStr = timestampStr.substring(0, 16) + ":00"; - timestamp = Timestamp.valueOf(timestampStr); - break; - case "yyyy-MM-dd hh": - timestampStr = timestampStr.substring(0, 13) + ":00:00"; - timestamp = Timestamp.valueOf(timestampStr); - break; - case "yyyy-MM-dd": - timestampStr = timestampStr.substring(0, 10) + " 00:00:00"; - timestamp = Timestamp.valueOf(timestampStr); - break; - case "yyyy-MM": - timestampStr = timestampStr.substring(0, 7) + "-01 00:00:00"; - timestamp = Timestamp.valueOf(timestampStr); - break; - case "yyyy": - case "y": - timestampStr = timestampStr.substring(0, 4) + "-01-01 00:00:00"; - timestamp = Timestamp.valueOf(timestampStr); - break; - default: - //timestamp = null; - } - return timestamp; - } - - public Timestamp TO_TIMESTAMP(@Parameter(name = "str1") String timestamp) { - return Timestamp.valueOf(timestamp); - } -} diff --git a/src/query/src/test/java/org/apache/kylin/query/udf/FormatUDFTest.java b/src/query/src/test/java/org/apache/kylin/query/udf/FormatUDFTest.java index d7eae6eaa4..f091715843 100644 --- a/src/query/src/test/java/org/apache/kylin/query/udf/FormatUDFTest.java +++ b/src/query/src/test/java/org/apache/kylin/query/udf/FormatUDFTest.java @@ -25,37 +25,10 @@ import java.sql.Timestamp; import org.apache.kylin.query.udf.formatUdf.DateFormatUDF; import org.apache.kylin.query.udf.formatUdf.ToCharUDF; -import org.apache.kylin.query.udf.formatUdf.ToDateUDF; -import org.apache.kylin.query.udf.formatUdf.ToTimestampUDF; import org.junit.Test; public class FormatUDFTest { - @Test - public void testToTimestampUDF() throws Exception { - ToTimestampUDF toTimestampUDF = new ToTimestampUDF(); - assertEquals(toTimestampUDF.TO_TIMESTAMP("2019-08-05 10:54:23"), Timestamp.valueOf("2019-08-05 10:54:23")); - assertEquals(toTimestampUDF.TO_TIMESTAMP("2019-08-05 10:54:23", "yyyy-MM-dd hh:mm"), - Timestamp.valueOf("2019-08-05 10:54:00")); - assertEquals(toTimestampUDF.TO_TIMESTAMP("2019-08-05 10:54:23", "yyyy-MM-dd hh"), - Timestamp.valueOf("2019-08-05 10:00:00")); - assertEquals(toTimestampUDF.TO_TIMESTAMP("2019-08-05 10:54:23", "yyyy-MM-dd"), - Timestamp.valueOf("2019-08-05 00:00:00")); - assertEquals(toTimestampUDF.TO_TIMESTAMP("2019-08-05 10:54:23", "yyyy-MM"), - Timestamp.valueOf("2019-08-01 00:00:00")); - assertEquals(toTimestampUDF.TO_TIMESTAMP("2019-08-05 10:54:23", "yyyy"), - Timestamp.valueOf("2019-01-01 00:00:00")); - } - - @Test - public void testToDateUDF() throws Exception { - ToDateUDF toDateUDF = new ToDateUDF(); - assertEquals(toDateUDF.TO_DATE("2019-08-05"), Date.valueOf("2019-08-05")); - assertEquals(toDateUDF.TO_DATE("2019-08-05", "yyyy-MM-dd"), Date.valueOf("2019-08-05")); - assertEquals(toDateUDF.TO_DATE("2019-08-05", "yyyy-MM"), Date.valueOf("2019-08-01")); - assertEquals(toDateUDF.TO_DATE("2019-08-05", "yyyy"), Date.valueOf("2019-01-01")); - } - @Test public void testToCharUDF() throws Exception { ToCharUDF toCharUDF = new ToCharUDF(); diff --git a/src/spark-project/sparder/src/main/scala/org/apache/kylin/query/runtime/ExpressionConverter.scala b/src/spark-project/sparder/src/main/scala/org/apache/kylin/query/runtime/ExpressionConverter.scala index 86423ae47d..1e3d392e75 100644 --- a/src/spark-project/sparder/src/main/scala/org/apache/kylin/query/runtime/ExpressionConverter.scala +++ b/src/spark-project/sparder/src/main/scala/org/apache/kylin/query/runtime/ExpressionConverter.scala @@ -35,7 +35,7 @@ import scala.collection.mutable object ExpressionConverter { - val unaryParameterFunc = mutable.HashSet("ucase", "lcase", "base64", + private val unaryParameterFunc = mutable.HashSet("ucase", "lcase", "base64", "sentences", "unbase64", "crc32", "md5", "sha", "sha1", // time "weekofyear", @@ -45,21 +45,21 @@ object ExpressionConverter { "explode" ) - val ternaryParameterFunc = mutable.HashSet("replace", "substring_index", "lpad", "rpad", "conv", "regexp_extract") - val binaryParameterFunc = + private val ternaryParameterFunc = mutable.HashSet("replace", "substring_index", "lpad", "rpad", "conv", "regexp_extract") + private val binaryParameterFunc = mutable.HashSet("decode", "encode", "find_in_set", "levenshtein", "sha2", "trunc", "add_months", "date_add", "date_sub", "from_utc_timestamp", "to_utc_timestamp", // math function "bround", "hypot", "log" ) - val noneParameterfunc = mutable.HashSet("current_database", "input_file_block_length", "input_file_block_start", + private val noParameterFunc = mutable.HashSet("current_database", "input_file_block_length", "input_file_block_start", "input_file_name", "monotonically_increasing_id", "now", "spark_partition_id", "uuid" ) - val varArgsFunc = mutable.HashSet("months_between", "locate", "rtrim", "from_unixtime") + private val varArgsFunc = mutable.HashSet("months_between", "locate", "rtrim", "from_unixtime", "to_date", "to_timestamp") - val bitmapUDF = mutable.HashSet("intersect_count_by_col", "subtract_bitmap_value", "subtract_bitmap_uuid"); + private val bitmapUDF = mutable.HashSet("intersect_count_by_col", "subtract_bitmap_value", "subtract_bitmap_uuid"); // scalastyle:off def convert(sqlTypeName: SqlTypeName, relDataType: RelDataType, op: SqlKind, opName: String, children: Seq[Any]): Any = { @@ -295,15 +295,6 @@ object ExpressionConverter { current_date() case "current_timestamp" => current_timestamp() - case "to_timestamp" => - if (children.length == 1) { - to_timestamp(k_lit(children.head)) - } else if (children.length == 2) { - to_timestamp(k_lit(children.head), k_lit(children.apply(1)).toString()) - } else { - throw new UnsupportedOperationException( - s"to_timestamp must provide one or two parameters under sparder") - } case "unix_timestamp" => if (children.isEmpty) { unix_timestamp @@ -315,15 +306,6 @@ object ExpressionConverter { throw new UnsupportedOperationException( s"unix_timestamp only supports two or fewer parameters") } - case "to_date" => - if (children.length == 1) { - to_date(k_lit(children.head)) - } else if (children.length == 2) { - to_date(k_lit(children.head), k_lit(children.apply(1)).toString()) - } else { - throw new UnsupportedOperationException( - s"to_date must provide one or two parameters under sparder") - } case "to_char" | "date_format" => var part = k_lit(children.apply(1)).toString().toUpperCase match { case "YEAR" => @@ -375,16 +357,16 @@ object ExpressionConverter { tan(k_lit(children.head)) case "sin" => sin(k_lit(children.head)) - case func if (noneParameterfunc.contains(func)) => - callUDF(func) + case func if (noParameterFunc.contains(func)) => + call_udf(func) case func if (unaryParameterFunc.contains(func)) => - callUDF(func, k_lit(children.head)) + call_udf(func, k_lit(children.head)) case func if (binaryParameterFunc.contains(func)) => - callUDF(func, k_lit(children.head), k_lit(children.apply(1))) + call_udf(func, k_lit(children.head), k_lit(children.apply(1))) case func if (ternaryParameterFunc.contains(func)) => - callUDF(func, k_lit(children.head), k_lit(children.apply(1)), k_lit(children.apply(2))) + call_udf(func, k_lit(children.head), k_lit(children.apply(1)), k_lit(children.apply(2))) case func if (varArgsFunc.contains(func)) => { - callUDF(func, children.map(k_lit(_)): _*) + call_udf(func, children.map(k_lit(_)): _*) } case "date_part" => var part = k_lit(children.head).toString().toUpperCase match { @@ -454,7 +436,7 @@ object ExpressionConverter { if (children.length == 1) { ceil(k_lit(children.head)) } else if (children.length == 2) { - callUDF("ceil_datetime", children.map(k_lit): _*) + call_udf("ceil_datetime", children.map(k_lit): _*) } else { throw new UnsupportedOperationException( s"ceil must provide one or two parameters under sparder")