This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e14f5f959d4 [SPARK-53111][SQL][PYTHON][CONNECT] Implement the 
time_diff function in PySpark
9e14f5f959d4 is described below

commit 9e14f5f959d4abba4d713a4165d97b59c431123c
Author: Uros Bojanic <[email protected]>
AuthorDate: Wed Oct 15 15:52:55 2025 +0800

    [SPARK-53111][SQL][PYTHON][CONNECT] Implement the time_diff function in 
PySpark
    
    ### What changes were proposed in this pull request?
    Implement the `time_diff` function in PySpark & PySpark Connect API.
    
    ### Why are the changes needed?
    Expand API support for the `time_diff` function.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, the new function is now available in Python API.
    
    ### How was this patch tested?
    Added appropriate Python function tests.
    
    - pyspark.sql.tests.test_functions
    - pyspark.sql.tests.connect.test_parity_functions
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #51829 from uros-db/python-time_diff.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Ruifeng Zheng <[email protected]>
---
 .../source/reference/pyspark.sql/functions.rst     |  1 +
 python/pyspark/sql/connect/functions/builtin.py    |  7 ++++
 python/pyspark/sql/functions/__init__.py           |  1 +
 python/pyspark/sql/functions/builtin.py            | 43 ++++++++++++++++++++++
 python/pyspark/sql/tests/test_functions.py         | 18 +++++++--
 5 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/python/docs/source/reference/pyspark.sql/functions.rst 
b/python/docs/source/reference/pyspark.sql/functions.rst
index 003fdc0a00b5..e4175707aecd 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -299,6 +299,7 @@ Date and Timestamp Functions
     timestamp_micros
     timestamp_millis
     timestamp_seconds
+    time_diff
     time_trunc
     to_date
     to_time
diff --git a/python/pyspark/sql/connect/functions/builtin.py 
b/python/pyspark/sql/connect/functions/builtin.py
index aee4a7572a35..2668b7a526fd 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -3650,6 +3650,13 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
 timestamp_seconds.__doc__ = pysparkfuncs.timestamp_seconds.__doc__
 
 
+def time_diff(unit: "ColumnOrName", start: "ColumnOrName", end: 
"ColumnOrName") -> Column:
+    return _invoke_function_over_columns("time_diff", unit, start, end)
+
+
+time_diff.__doc__ = pysparkfuncs.time_diff.__doc__
+
+
 def time_trunc(unit: "ColumnOrName", time: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("time_trunc", unit, time)
 
diff --git a/python/pyspark/sql/functions/__init__.py 
b/python/pyspark/sql/functions/__init__.py
index 7c3f4cbc1a4f..e1b320c98f7f 100644
--- a/python/pyspark/sql/functions/__init__.py
+++ b/python/pyspark/sql/functions/__init__.py
@@ -248,6 +248,7 @@ __all__ = [  # noqa: F405
     "timestamp_micros",
     "timestamp_millis",
     "timestamp_seconds",
+    "time_diff",
     "time_trunc",
     "to_date",
     "to_time",
diff --git a/python/pyspark/sql/functions/builtin.py 
b/python/pyspark/sql/functions/builtin.py
index 0dd0aea7bced..24baace54621 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -12710,6 +12710,49 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("timestamp_seconds", col)
 
 
+@_try_remote_functions
+def time_diff(unit: "ColumnOrName", start: "ColumnOrName", end: 
"ColumnOrName") -> Column:
+    """
+    Returns the difference between two times, measured in specified units.
+
+    .. versionadded:: 4.1.0
+
+    Parameters
+    ----------
+    unit : :class:`~pyspark.sql.Column` or column name
+        The unit to truncate the time to. Supported units are: "HOUR", 
"MINUTE", "SECOND",
+        "MILLISECOND", and "MICROSECOND". The unit is case-insensitive.
+    start : :class:`~pyspark.sql.Column` or column name
+        A starting time.
+    end : :class:`~pyspark.sql.Column` or column name
+        An ending time.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        The difference between two times, in the specified units.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.date_diff`
+    :meth:`pyspark.sql.functions.timestamp_diff`
+
+    Examples
+    --------
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [("HOUR", "13:08:15", "21:30:28")], ['unit', 'start', 
'end']).withColumn("start",
+    ...     sf.col("start").cast("time")).withColumn("end", 
sf.col("end").cast("time"))
+    >>> df.select('*', sf.time_diff('unit', 'start', 'end')).show()
+    +----+--------+--------+---------------------------+
+    |unit|   start|     end|time_diff(unit, start, end)|
+    +----+--------+--------+---------------------------+
+    |HOUR|13:08:15|21:30:28|                          8|
+    +----+--------+--------+---------------------------+
+    """
+    return _invoke_function_over_columns("time_diff", unit, start, end)
+
+
 @_try_remote_functions
 def time_trunc(unit: "ColumnOrName", time: "ColumnOrName") -> Column:
     """
diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index 91e519c6f8c7..41c07a61eb1e 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -81,10 +81,7 @@ class FunctionsTestsMixin:
         missing_in_py = jvm_fn_set.difference(py_fn_set)
 
         # Functions that we expect to be missing in python until they are 
added to pyspark
-        expected_missing_in_py = set(
-            # TODO(SPARK-53108): Implement the time_diff function in Python
-            ["time_diff"]
-        )
+        expected_missing_in_py = set()
 
         self.assertEqual(
             expected_missing_in_py, missing_in_py, "Missing functions in 
pyspark not as expected"
@@ -403,6 +400,19 @@ class FunctionsTestsMixin:
         rndn2 = df.select("key", F.randn(0)).collect()
         self.assertEqual(sorted(rndn1), sorted(rndn2))
 
+    def test_time_diff(self):
+        # SPARK-53111: test the time_diff function.
+        df = self.spark.range(1).select(
+            F.lit("hour").alias("unit"),
+            F.lit(datetime.time(20, 30, 29)).alias("start"),
+            F.lit(datetime.time(21, 30, 29)).alias("end"),
+        )
+        result = 1
+        row_from_col = df.select(F.time_diff(df.unit, df.start, 
df.end)).first()
+        self.assertEqual(row_from_col[0], result)
+        row_from_name = df.select(F.time_diff("unit", "start", "end")).first()
+        self.assertEqual(row_from_name[0], result)
+
     def test_time_trunc(self):
         # SPARK-53110: test the time_trunc function.
         df = self.spark.range(1).select(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to