This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fa66d28f0e34 [SPARK-55465][PYTHON] Support GeometryType in
convert_numpy
fa66d28f0e34 is described below
commit fa66d28f0e34342b72706210d685180c084938d9
Author: Fangchen Li <[email protected]>
AuthorDate: Thu Mar 5 15:26:44 2026 +0800
[SPARK-55465][PYTHON] Support GeometryType in convert_numpy
### What changes were proposed in this pull request?
Support GeometryType in convert_numpy
### Why are the changes needed?
Part of the new arrow-to-pandas-converter
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unittests added.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Opus 4.6
Closes #54612 from fangchenli/SPARK-55465-convert-numpy-geometry.
Authored-by: Fangchen Li <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/conversion.py | 7 ++++-
python/pyspark/sql/tests/test_conversion.py | 40 +++++++++++++++++++++++++++++
2 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/python/pyspark/sql/conversion.py b/python/pyspark/sql/conversion.py
index aa22594585bb..7e6287fce07e 100644
--- a/python/pyspark/sql/conversion.py
+++ b/python/pyspark/sql/conversion.py
@@ -1607,6 +1607,7 @@ class ArrowArrayToPandasConversion:
UserDefinedType,
VariantType,
GeographyType,
+ GeometryType,
)
if df_for_struct and isinstance(spark_type, StructType):
return all(isinstance(f.dataType, supported_types) for f in
spark_type.fields)
@@ -1725,13 +1726,17 @@ class ArrowArrayToPandasConversion:
series = series.map(
lambda v: Geography.fromWKB(v["wkb"], v["srid"]) if v is not
None else None
)
+ elif isinstance(spark_type, GeometryType):
+ series = arr.to_pandas()
+ series = series.map(
+ lambda v: Geometry.fromWKB(v["wkb"], v["srid"]) if v is not
None else None
+ )
# elif isinstance(
# spark_type,
# (
# ArrayType,
# MapType,
# StructType,
- # GeometryType,
# ),
# ):
# TODO(SPARK-55324): Support complex types
diff --git a/python/pyspark/sql/tests/test_conversion.py
b/python/pyspark/sql/tests/test_conversion.py
index 04c22fb31ae9..261b81a407b5 100644
--- a/python/pyspark/sql/tests/test_conversion.py
+++ b/python/pyspark/sql/tests/test_conversion.py
@@ -34,6 +34,7 @@ from pyspark.sql.types import (
DoubleType,
Geography,
GeographyType,
+ Geometry,
GeometryType,
IntegerType,
LongType,
@@ -696,6 +697,45 @@ class ArrowArrayToPandasConversionTests(unittest.TestCase):
)
self.assertEqual(len(result), 0)
+ def test_geometry_convert_numpy(self):
+ import pyarrow as pa
+
+ geometry_type = pa.struct(
+ [
+ pa.field("srid", pa.int32(), nullable=False),
+ pa.field(
+ "wkb",
+ pa.binary(),
+ nullable=False,
+ metadata={b"geometry": b"true", b"srid": b"0"},
+ ),
+ ]
+ )
+
+ # basic conversion with nulls
+ # POINT(1.0, 2.0) and POINT(17.0, 7.0) in WKB format
+ wkb1 = bytes.fromhex("0101000000000000000000F03F0000000000000040")
+ wkb2 = bytes.fromhex("010100000000000000000031400000000000001c40")
+ arr = pa.array(
+ [
+ {"srid": 0, "wkb": wkb1},
+ None,
+ {"srid": 0, "wkb": wkb2},
+ ],
+ type=geometry_type,
+ )
+ result = ArrowArrayToPandasConversion.convert_numpy(arr,
GeometryType(0), ser_name="g")
+ self.assertEqual(result.iloc[0], Geometry(wkb1, 0))
+ self.assertIsNone(result.iloc[1])
+ self.assertEqual(result.iloc[2], Geometry(wkb2, 0))
+ self.assertEqual(result.name, "g")
+
+ # empty
+ result = ArrowArrayToPandasConversion.convert_numpy(
+ pa.array([], type=geometry_type), GeometryType(0)
+ )
+ self.assertEqual(len(result), 0)
+
if __name__ == "__main__":
from pyspark.testing import main
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]