This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c5f007b11630 [SPARK-53829][PYTHON] Support `datetime.time` in column
operators
c5f007b11630 is described below
commit c5f007b11630c5fee7824b49f9a421ffc5589565
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Tue Oct 7 21:12:06 2025 -0700
[SPARK-53829][PYTHON] Support `datetime.time` in column operators
### What changes were proposed in this pull request?
Support `datetime.time` in column operators
### Why are the changes needed?
to be consistent with other datetype types
### Does this PR introduce _any_ user-facing change?
yes, this will work
```py
df.select("i").where(sf.col("t") < datetime.time(3, 0, 0))
```
### How was this patch tested?
new tests
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #52540 from zhengruifeng/py_col_time.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
python/pyspark/sql/connect/_typing.py | 2 +-
python/pyspark/sql/connect/column.py | 17 ++++++++++++++---
python/pyspark/sql/tests/test_column.py | 20 ++++++++++++++++++++
3 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/sql/connect/_typing.py
b/python/pyspark/sql/connect/_typing.py
index efb3e0e8eb50..7c8351185d6e 100644
--- a/python/pyspark/sql/connect/_typing.py
+++ b/python/pyspark/sql/connect/_typing.py
@@ -39,7 +39,7 @@ LiteralType = PrimitiveType
DecimalLiteral = decimal.Decimal
-DateTimeLiteral = Union[datetime.datetime, datetime.date]
+DateTimeLiteral = Union[datetime.date, datetime.time, datetime.datetime]
DataTypeOrString = Union[DataType, str]
diff --git a/python/pyspark/sql/connect/column.py
b/python/pyspark/sql/connect/column.py
index d6ed62ba4a52..78960d979522 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -84,10 +84,11 @@ def _bin_op(
float,
int,
str,
- datetime.datetime,
datetime.date,
- decimal.Decimal,
+ datetime.time,
+ datetime.datetime,
datetime.timedelta,
+ decimal.Decimal,
),
):
other_expr = LiteralExpression._from_value(other)
@@ -384,7 +385,17 @@ class Column(ParentColumn):
def __eq__(self, other: Any) -> ParentColumn: # type: ignore[override]
other = enum_to_value(other)
if other is None or isinstance(
- other, (bool, float, int, str, datetime.datetime, datetime.date,
decimal.Decimal)
+ other,
+ (
+ bool,
+ float,
+ int,
+ str,
+ datetime.date,
+ datetime.time,
+ datetime.datetime,
+ decimal.Decimal,
+ ),
):
other_expr = LiteralExpression._from_value(other)
else:
diff --git a/python/pyspark/sql/tests/test_column.py
b/python/pyspark/sql/tests/test_column.py
index e2ebfd3c092c..ae9010fbc6d4 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -113,6 +113,26 @@ class ColumnTestsMixin:
ValueError, "Cannot apply 'in' operator against a column", lambda:
1 in cs
)
+ def test_column_date_time_op(self):
+ query = """
+ SELECT * FROM VALUES
+ (TIME('00:00:00'), 1),
+ (TIME('01:02:03'), 2),
+ (TIME('11:12:13'), 3)
+ AS tab(t, i)
+ """
+
+ df = self.spark.sql(query)
+
+ res1 = df.select("i").where(sf.col("t") < datetime.time(3, 0, 0))
+ self.assertEqual([r.i for r in res1.collect()], [1, 2])
+
+ res2 = df.select("i").where(sf.col("t") > datetime.time(1, 0, 0))
+ self.assertEqual([r.i for r in res2.collect()], [2, 3])
+
+ res3 = df.select("i").where(sf.col("t") == datetime.time(0, 0, 0))
+ self.assertEqual([r.i for r in res3.collect()], [1])
+
def test_column_accessor(self):
from pyspark.sql.functions import col
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]