This is an automated email from the ASF dual-hosted git repository.
xinrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6ab297b0fc7d [SPARK-52519][PS] Enable divide-by-zero for numeric
floordiv with ANSI enabled
6ab297b0fc7d is described below
commit 6ab297b0fc7d13da24154c958b946a34a0c552b7
Author: Xinrong Meng <[email protected]>
AuthorDate: Fri Jun 20 13:38:40 2025 -0700
[SPARK-52519][PS] Enable divide-by-zero for numeric floordiv with ANSI
enabled
### What changes were proposed in this pull request?
Enable divide-by-zero for numeric floordiv with ANSI enabled
### Why are the changes needed?
Ensure pandas on Spark works well with ANSI mode on.
Part of https://issues.apache.org/jira/browse/SPARK-52169.
### Does this PR introduce _any_ user-facing change?
Yes.
```py
>>> spark.conf.get("spark.sql.ansi.enabled")
'true'
>>> ps.set_option("compute.fail_on_ansi_mode", False)
>>> ps.set_option("compute.ansi_mode_support", True)
>>> ps.Series([1, 2]) // 0
0 inf
1 inf
dtype: float64
>>> ps.Series([1, 2]) // ps.Series([0, 0])
0 inf
1 inf
dtype: float64
```
### How was this patch tested?
Unit tests.
```
(dev3.10) spark (num_floordiv) % SPARK_ANSI_SQL_MODE=true
./python/run-tests --python-executables=python3.10 --testnames
"pyspark.pandas.tests.computation.test_binary_ops
FrameBinaryOpsTests.test_binary_operator_floordiv"
...
Tests passed in 6 seconds
(dev3.10) spark (num_floordiv) % SPARK_ANSI_SQL_MODE=false
./python/run-tests --python-executables=python3.10 --testnames
"pyspark.pandas.tests.computation.test_binary_ops
FrameBinaryOpsTests.test_binary_operator_floordiv"
...
Tests passed in 4 seconds
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #51209 from xinrong-meng/num_floordiv.
Lead-authored-by: Xinrong Meng <[email protected]>
Co-authored-by: Xinrong Meng <[email protected]>
Signed-off-by: Xinrong Meng <[email protected]>
---
python/pyspark/pandas/data_type_ops/num_ops.py | 24 +++++++++++++++++++---
.../pandas/tests/computation/test_binary_ops.py | 6 +++++-
2 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py
b/python/pyspark/pandas/data_type_ops/num_ops.py
index 34d313af8232..06622ef71d88 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -16,7 +16,7 @@
#
import numbers
-from typing import Any, Union
+from typing import Any, Union, Callable
import numpy as np
import pandas as pd
@@ -271,13 +271,22 @@ class IntegralOps(NumericOps):
_sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Floor division can not be applied to given
types.")
+ spark_session = left._internal.spark_frame.sparkSession
+ use_try_divide = is_ansi_mode_enabled(spark_session)
+
+ def fallback_div(x: PySparkColumn, y: PySparkColumn) -> PySparkColumn:
+ return x.__div__(y)
+
+ safe_div: Callable[[PySparkColumn, PySparkColumn], PySparkColumn] = (
+ F.try_divide if use_try_divide else fallback_div
+ )
def floordiv(left: PySparkColumn, right: Any) -> PySparkColumn:
return F.when(F.lit(right is np.nan), np.nan).otherwise(
F.when(
F.lit(right != 0) | F.lit(right).isNull(),
F.floor(left.__div__(right)),
- ).otherwise(F.lit(np.inf).__div__(left))
+ ).otherwise(safe_div(F.lit(np.inf), left))
)
right = transform_boolean_operand_to_numeric(right,
spark_type=left.spark.data_type)
@@ -369,6 +378,15 @@ class FractionalOps(NumericOps):
_sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Floor division can not be applied to given
types.")
+ spark_session = left._internal.spark_frame.sparkSession
+ use_try_divide = is_ansi_mode_enabled(spark_session)
+
+ def fallback_div(x: PySparkColumn, y: PySparkColumn) -> PySparkColumn:
+ return x.__div__(y)
+
+ safe_div: Callable[[PySparkColumn, PySparkColumn], PySparkColumn] = (
+ F.try_divide if use_try_divide else fallback_div
+ )
def floordiv(left: PySparkColumn, right: Any) -> PySparkColumn:
return F.when(F.lit(right is np.nan), np.nan).otherwise(
@@ -377,7 +395,7 @@ class FractionalOps(NumericOps):
F.floor(left.__div__(right)),
).otherwise(
F.when(F.lit(left == np.inf) | F.lit(left == -np.inf),
left).otherwise(
- F.lit(np.inf).__div__(left)
+ safe_div(F.lit(np.inf), left)
)
)
)
diff --git a/python/pyspark/pandas/tests/computation/test_binary_ops.py
b/python/pyspark/pandas/tests/computation/test_binary_ops.py
index 3c9b7293d5d5..cda9958ad3de 100644
--- a/python/pyspark/pandas/tests/computation/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/computation/test_binary_ops.py
@@ -208,7 +208,11 @@ class FrameBinaryOpsMixin:
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 1 / psdf["a"])
def test_binary_operator_floordiv(self):
- psdf = ps.DataFrame({"a": ["x"], "b": [1]})
+ pdf = pd.DataFrame({"a": ["x"], "b": [1], "c": [1.0], "d": [0]})
+ psdf = ps.from_pandas(pdf)
+ self.assert_eq(pdf["b"] // 0, psdf["b"] // 0)
+ self.assert_eq(pdf["c"] // 0, psdf["c"] // 0)
+ self.assert_eq(pdf["d"] // 0, psdf["d"] // 0)
ks_err_msg = "Floor division can not be applied to strings"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] //
psdf["b"])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]