This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 03cb4d9d6874 [SPARK-52883][SPARK-52884][SQL] Implement the to_time and
try_to_time functions in Scala
03cb4d9d6874 is described below
commit 03cb4d9d68746a714efcd335540c420da97bb9ed
Author: Uros Bojanic <[email protected]>
AuthorDate: Wed Jul 23 16:17:27 2025 +0200
[SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time
functions in Scala
### What changes were proposed in this pull request?
Implement the `to_time` and `try_to_time` functions in Scala API.
### Why are the changes needed?
Expand API support for the `ToTime` and `TryToTime` expressions.
### Does this PR introduce _any_ user-facing change?
Yes, the new functions are now available in Scala API.
### How was this patch tested?
Added appropriate Scala functions tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51575 from uros-db/scala-try_to_time.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
python/pyspark/sql/tests/test_functions.py | 4 +-
.../scala/org/apache/spark/sql/functions.scala | 70 +++++++++
.../apache/spark/sql/TimeFunctionsSuiteBase.scala | 174 ++++++++++++++++++++-
3 files changed, 246 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index 183a17728421..3a4dfd8f6e4d 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -83,7 +83,9 @@ class FunctionsTestsMixin:
# Functions that we expect to be missing in python until they are
added to pyspark
expected_missing_in_py = set(
# TODO(SPARK-52888): Implement the make_time function in Python
- ["make_time"]
+ # TODO(SPARK-52890): Implement the to_time function in Python
+ # TODO(SPARK-52891): Implement the try_to_time function in Python
+ ["make_time", "to_time", "try_to_time"]
)
self.assertEqual(
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index 2b41d0333534..e16554eaddbf 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -5699,6 +5699,41 @@ object functions {
def unix_timestamp(s: Column, p: String): Column =
Column.fn("unix_timestamp", s, lit(p))
+ /**
+ * Parses a string value to a time value.
+ *
+ * @param str
+ * A string to be parsed to time.
+ * @return
+ * A time, or raises an error if the input is malformed.
+ *
+ * @group datetime_funcs
+ * @since 4.1.0
+ */
+ def to_time(str: Column): Column = {
+ Column.fn("to_time", str)
+ }
+
+ /**
+ * Parses a string value to a time value.
+ *
+ * See <a
href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
Datetime
+ * Patterns</a> for valid time format patterns.
+ *
+ * @param str
+ * A string to be parsed to time.
+ * @param format
+ * A time format pattern to follow.
+ * @return
+ * A time, or raises an error if the input is malformed.
+ *
+ * @group datetime_funcs
+ * @since 4.1.0
+ */
+ def to_time(str: Column, format: Column): Column = {
+ Column.fn("to_time", str, format)
+ }
+
/**
* Converts to a timestamp by casting rules to `TimestampType`.
*
@@ -5731,6 +5766,41 @@ object functions {
*/
def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp",
s, lit(fmt))
+ /**
+ * Parses a string value to a time value.
+ *
+ * @param str
+ * A string to be parsed to time.
+ * @return
+ * A time, or null if the input is malformed.
+ *
+ * @group datetime_funcs
+ * @since 4.1.0
+ */
+ def try_to_time(str: Column): Column = {
+ Column.fn("try_to_time", str)
+ }
+
+ /**
+ * Parses a string value to a time value.
+ *
+ * See <a
href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
Datetime
+ * Patterns</a> for valid time format patterns.
+ *
+ * @param str
+ * A string to be parsed to time.
+ * @param format
+ * A time format pattern to follow.
+ * @return
+ * A time, or null if the input is malformed.
+ *
+ * @group datetime_funcs
+ * @since 4.1.0
+ */
+ def try_to_time(str: Column, format: Column): Column = {
+ Column.fn("try_to_time", str, format)
+ }
+
/**
* Parses the `s` with the `format` to a timestamp. The function always
returns null on an
* invalid input with`/`without ANSI SQL mode enabled. The result data type
is consistent with
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
b/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
index 4702b9d43498..7d7c4597ddfe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
import java.time.LocalTime
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkDateTimeException}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
@@ -161,6 +161,178 @@ abstract class TimeFunctionsSuiteBase extends QueryTest
with SharedSparkSession
checkAnswer(result1, expected)
checkAnswer(result2, expected)
}
+
+ test("SPARK-52883: to_time function without format") {
+ // Input data for the function.
+ val schema = StructType(Seq(
+ StructField("str", StringType, nullable = false)
+ ))
+ val data = Seq(
+ Row("00:00:00"),
+ Row("01:02:03.4"),
+ Row("23:59:59.999999")
+ )
+ val df = spark.createDataFrame(spark.sparkContext.parallelize(data),
schema)
+
+ // Test the function using both `selectExpr` and `select`.
+ val result1 = df.selectExpr(
+ "to_time(str)"
+ )
+ val result2 = df.select(
+ to_time(col("str"))
+ )
+ // Check that both methods produce the same result.
+ checkAnswer(result1, result2)
+
+ // Expected output of the function.
+ val expected = Seq(
+ "00:00:00",
+ "01:02:03.4",
+ "23:59:59.999999"
+ ).toDF("timeString").select(col("timeString").cast("time"))
+ // Check that the results match the expected output.
+ checkAnswer(result1, expected)
+ checkAnswer(result2, expected)
+
+ // Error is thrown for malformed input.
+ val invalidTimeDF = Seq("invalid_time").toDF("str")
+ checkError(
+ exception = intercept[SparkDateTimeException] {
+ invalidTimeDF.select(to_time(col("str"))).collect()
+ },
+ condition = "CANNOT_PARSE_TIME",
+ parameters = Map("input" -> "'invalid_time'", "format" ->
"'HH:mm:ss.SSSSSS'")
+ )
+ }
+
+ test("SPARK-52883: to_time function with format") {
+ // Input data for the function.
+ val schema = StructType(Seq(
+ StructField("str", StringType, nullable = false),
+ StructField("format", StringType, nullable = false)
+ ))
+ val data = Seq(
+ Row("00.00.00", "HH.mm.ss"),
+ Row("01.02.03.4", "HH.mm.ss.S"),
+ Row("23.59.59.999999", "HH.mm.ss.SSSSSS")
+ )
+ val df = spark.createDataFrame(spark.sparkContext.parallelize(data),
schema)
+
+ // Test the function using both `selectExpr` and `select`.
+ val result1 = df.selectExpr(
+ "to_time(str, format)"
+ )
+ val result2 = df.select(
+ to_time(col("str"), col("format"))
+ )
+ // Check that both methods produce the same result.
+ checkAnswer(result1, result2)
+
+ // Expected output of the function.
+ val expected = Seq(
+ "00:00:00",
+ "01:02:03.4",
+ "23:59:59.999999"
+ ).toDF("timeString").select(col("timeString").cast("time"))
+ // Check that the results match the expected output.
+ checkAnswer(result1, expected)
+ checkAnswer(result2, expected)
+
+ // Error is thrown for malformed input.
+ val invalidTimeDF = Seq(("invalid_time", "HH.mm.ss")).toDF("str", "format")
+ checkError(
+ exception = intercept[SparkDateTimeException] {
+ invalidTimeDF.select(to_time(col("str"), col("format"))).collect()
+ },
+ condition = "CANNOT_PARSE_TIME",
+ parameters = Map("input" -> "'invalid_time'", "format" -> "'HH.mm.ss'")
+ )
+ }
+
+ test("SPARK-52884: try_to_time function without format") {
+ // Input data for the function.
+ val schema = StructType(Seq(
+ StructField("str", StringType)
+ ))
+ val data = Seq(
+ Row("00:00:00"),
+ Row("01:02:03.4"),
+ Row("23:59:59.999999"),
+ Row("invalid_time"),
+ Row(null)
+ )
+ val df = spark.createDataFrame(spark.sparkContext.parallelize(data),
schema)
+
+ // Test the function using both `selectExpr` and `select`.
+ val result1 = df.selectExpr(
+ "try_to_time(str)"
+ )
+ val result2 = df.select(
+ try_to_time(col("str"))
+ )
+ // Check that both methods produce the same result.
+ checkAnswer(result1, result2)
+
+ // Expected output of the function.
+ val expected = Seq(
+ "00:00:00",
+ "01:02:03.4",
+ "23:59:59.999999",
+ null,
+ null
+ ).toDF("timeString").select(col("timeString").cast("time"))
+ // Check that the results match the expected output.
+ checkAnswer(result1, expected)
+ checkAnswer(result2, expected)
+ }
+
+ test("SPARK-52884: try_to_time function with format") {
+ // Input data for the function.
+ val schema = StructType(Seq(
+ StructField("str", StringType),
+ StructField("format", StringType)
+ ))
+ val data = Seq(
+ Row("00.00.00", "HH.mm.ss"),
+ Row("01.02.03.4", "HH.mm.ss.SSS"),
+ Row("23.59.59.999999", "HH.mm.ss.SSSSSS"),
+ Row("invalid_time", "HH.mm.ss"),
+ Row("00.00.00", "invalid_format"),
+ Row("invalid_time", "invalid_format"),
+ Row("00:00:00", "HH.mm.ss"),
+ Row("abc", "HH.mm.ss"),
+ Row("00:00:00", null),
+ Row(null, "HH.mm.ss")
+ )
+ val df = spark.createDataFrame(spark.sparkContext.parallelize(data),
schema)
+
+ // Test the function using both `selectExpr` and `select`.
+ val result1 = df.selectExpr(
+ "try_to_time(str, format)"
+ )
+ val result2 = df.select(
+ try_to_time(col("str"), col("format"))
+ )
+ // Check that both methods produce the same result.
+ checkAnswer(result1, result2)
+
+ // Expected output of the function.
+ val expected = Seq(
+ "00:00:00",
+ "01:02:03.4",
+ "23:59:59.999999",
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ null
+ ).toDF("timeString").select(col("timeString").cast("time"))
+ // Check that the results match the expected output.
+ checkAnswer(result1, expected)
+ checkAnswer(result2, expected)
+ }
}
// This class is used to run the same tests with ANSI mode enabled explicitly.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]