This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1b99a561c8e2 [SPARK-55539][GEO][SQL] Allow casting from GeographyType
to GeometryType
1b99a561c8e2 is described below
commit 1b99a561c8e2afdbb3af5933e227b81ae44cfd1a
Author: Uros Bojanic <[email protected]>
AuthorDate: Thu Feb 26 19:03:44 2026 +0800
[SPARK-55539][GEO][SQL] Allow casting from GeographyType to GeometryType
### What changes were proposed in this pull request?
This PR allows casting `GEOMETRY` to `GEOGRAPHY` if they have the same
geographic SRID.
### Why are the changes needed?
Enable explicit casting between geospatial types.
### Does this PR introduce _any_ user-facing change?
Yes, casting `GEOMETRY` to `GEOGRAPHY` is now allowed.
### How was this patch tested?
Added new unit tests:
- `StUtilsSuite`
- `CastSuiteBase`
Added new e2e SQL tests:
- `st-functions`
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54331 from uros-db/geo-cast-geom_geog.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/catalyst/util/STUtils.java | 19 +++++++++
.../spark/sql/catalyst/expressions/Cast.scala | 31 ++++++++++++--
.../sql/catalyst/expressions/CastSuiteBase.scala | 32 +++++++++++++++
.../spark/sql/catalyst/util/StUtilsSuite.java | 30 ++++++++++++++
.../analyzer-results/st-functions.sql.out | 36 ++++++++++++++++
.../resources/sql-tests/inputs/st-functions.sql | 7 ++++
.../sql-tests/results/st-functions.sql.out | 48 ++++++++++++++++++++++
7 files changed, 200 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
index c02192965fd7..4026bbb2c22d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.catalyst.util;
+import org.apache.spark.sql.catalyst.util.geo.WkbParseException;
+import org.apache.spark.sql.catalyst.util.geo.WkbReader;
import org.apache.spark.sql.errors.QueryExecutionErrors;
import org.apache.spark.sql.types.GeographyType;
import org.apache.spark.sql.types.GeometryType;
@@ -52,6 +54,23 @@ public final class STUtils {
/** Geospatial type casting utility methods. */
+ // Cast geometry to geography.
+ public static GeographyVal geometryToGeography(GeometryVal geometryVal) {
+ // We first need to check whether the input geometry has a geographic SRID.
+ int srid = stSrid(geometryVal);
+ if(!GeographyType.isSridSupported(srid)) {
+ throw QueryExecutionErrors.stInvalidSridValueError(String.valueOf(srid));
+ }
+ // We also need to check whether the input geometry has coordinates in
geography bounds.
+ try {
+ byte[] wkb = stAsBinary(geometryVal);
+ new WkbReader(true).read(wkb, srid);
+ } catch (WkbParseException e) {
+ throw QueryExecutionErrors.wkbParseError(e.getParseError(),
e.getPosition());
+ }
+ return toPhysVal(Geography.fromBytes(geometryVal.getBytes()));
+ }
+
// Cast geography to geometry.
public static GeometryVal geographyToGeometry(GeographyVal geographyVal) {
// Geographic SRID is always a valid SRID for geometry, so we don't need
to check it.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 849f3b8a0d1b..c51d3508d04a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -38,7 +38,7 @@ import
org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte,
import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{GeographyVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String,
VariantVal}
import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
import org.apache.spark.util.ArrayImplicits._
@@ -173,6 +173,9 @@ object Cast extends QueryErrorsBase {
// Casts from concrete GEOMETRY(srid) to mixed GEOMETRY(ANY) is allowed.
case (gt1: GeometryType, gt2: GeometryType) if !gt1.isMixedSrid &&
gt2.isMixedSrid =>
true
+ // Casting from GEOMETRY to GEOGRAPHY with the same SRID is allowed.
+ case (geom: GeometryType, geog: GeographyType) if geom.srid == geog.srid =>
+ true
case _ => false
}
@@ -309,6 +312,9 @@ object Cast extends QueryErrorsBase {
// Casts from concrete GEOMETRY(srid) to mixed GEOMETRY(ANY) is allowed.
case (gt1: GeometryType, gt2: GeometryType) if !gt1.isMixedSrid &&
gt2.isMixedSrid =>
true
+ // Casting from GEOMETRY to GEOGRAPHY with the same SRID is allowed.
+ case (geom: GeometryType, geog: GeographyType) if geom.srid == geog.srid =>
+ true
case _ => false
}
@@ -1200,6 +1206,14 @@ case class Cast(
b => numeric.toFloat(b)
}
+ // GeographyConverter
+ private[this] def castToGeography(from: DataType): Any => Any = from match {
+ case _: GeographyType =>
+ identity
+ case _: GeometryType =>
+ buildCast[GeometryVal](_, STUtils.geometryToGeography)
+ }
+
// GeometryConverter
private[this] def castToGeometry(from: DataType): Any => Any = from match {
case _: GeographyType =>
@@ -1287,7 +1301,7 @@ case class Cast(
case FloatType => castToFloat(from)
case LongType => castToLong(from)
case DoubleType => castToDouble(from)
- case _: GeographyType => identity
+ case _: GeographyType => castToGeography(from)
case _: GeometryType => castToGeometry(from)
case array: ArrayType =>
castArray(from.asInstanceOf[ArrayType].elementType,
array.elementType)
@@ -1397,7 +1411,7 @@ case class Cast(
case FloatType => castToFloatCode(from, ctx)
case LongType => castToLongCode(from, ctx)
case DoubleType => castToDoubleCode(from, ctx)
- case _: GeographyType => (c, evPrim, _) => code"$evPrim = $c;"
+ case _: GeographyType => castToGeographyCode(from)
case _: GeometryType => castToGeometryCode(from)
case array: ArrayType =>
@@ -2245,6 +2259,17 @@ case class Cast(
}
}
+ private[this] def castToGeographyCode(from: DataType): CastFunction = {
+ from match {
+ case _: GeographyType =>
+ (c, evPrim, _) =>
+ code"$evPrim = $c;"
+ case _: GeometryType =>
+ (c, evPrim, _) =>
+ code"$evPrim =
org.apache.spark.sql.catalyst.util.STUtils.geometryToGeography($c);"
+ }
+ }
+
private[this] def castToGeometryCode(from: DataType): CastFunction = {
from match {
case _: GeographyType =>
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index e18a489d36f3..e888432ef91e 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1544,6 +1544,38 @@ abstract class CastSuiteBase extends SparkFunSuite with
ExpressionEvalHelper {
}
}
+ test("Casting GeometryType to GeographyType") {
+ // Casting from GEOMETRY to GEOGRAPHY is only allowed if the SRIDs are the
same.
+
+ // Valid cast test cases.
+ val canAnsiCastTestCases: Seq[(DataType, DataType)] = Seq(
+ (GeometryType(4326), GeographyType(4326)),
+ (GeometryType("ANY"), GeographyType("ANY"))
+ )
+ // Iterate over the test cases and verify casting.
+ canAnsiCastTestCases.foreach { case (fromType, toType) =>
+ // Cast can be performed from `fromType` to `toType`.
+ assert(Cast.canCast(fromType, toType))
+ assert(Cast.canAnsiCast(fromType, toType))
+ }
+
+ // Invalid cast test cases.
+ val cannotAnsiCastTestCases: Seq[(DataType, DataType)] = Seq(
+ (GeometryType(0), GeographyType(4326)),
+ (GeometryType(3857), GeographyType(4326)),
+ (GeometryType("ANY"), GeographyType(4326)),
+ (GeometryType(0), GeographyType("ANY")),
+ (GeometryType(3857), GeographyType("ANY")),
+ (GeometryType(4326), GeographyType("ANY"))
+ )
+ // Iterate over the test cases and verify casting.
+ cannotAnsiCastTestCases.foreach { case (fromType, toType) =>
+ // Cast cannot be performed from `fromType` to `toType`.
+ assert(!Cast.canCast(fromType, toType))
+ assert(!Cast.canAnsiCast(fromType, toType))
+ }
+ }
+
test("Casting GeometryType to GeometryType") {
// Casting from fixed SRID GEOMETRY(<srid>) to mixed SRID GEOMETRY(ANY) is
always allowed.
// Type casting is always safe in this direction, so no additional
constraints are imposed.
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
index e5e6d32ab99f..aa1b4735cf62 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -45,6 +45,7 @@ class STUtilsSuite {
// A sample Geometry byte array for testing purposes, representing a POINT(1
2) with SRID 0.
private final int testGeometrySrid = 0;
private final byte[] testGeometryBytes;
+ private final byte[] testGeometry4326Bytes;
// Common constants used in geo value construction.
private final ByteOrder end = Geo.DEFAULT_ENDIANNESS;
@@ -63,10 +64,39 @@ class STUtilsSuite {
testGeometryBytes = new byte[sridLen + wkbLen];
System.arraycopy(geomSrid, 0, testGeometryBytes, 0, sridLen);
System.arraycopy(testWkb, 0, testGeometryBytes, sridLen, wkbLen);
+ testGeometry4326Bytes = new byte[sridLen + wkbLen];
+ System.arraycopy(geogSrid, 0, testGeometry4326Bytes, 0, sridLen);
+ System.arraycopy(testWkb, 0, testGeometry4326Bytes, sridLen, wkbLen);
}
/** Geospatial type casting utility methods. */
+ @Test
+ void testGeometryToGeography() {
+ GeometryVal geometryVal = GeometryVal.fromBytes(testGeometry4326Bytes);
+ GeographyVal geographyVal = STUtils.geometryToGeography(geometryVal);
+ assertNotNull(geographyVal);
+ assertArrayEquals(geometryVal.getBytes(), geographyVal.getBytes());
+ // Non-geographic SRID should not be allowed for geometry to geography
casting.
+ SparkIllegalArgumentException sridException = assertThrows(
+ SparkIllegalArgumentException.class,
+ () ->
STUtils.geometryToGeography(GeometryVal.fromBytes(testGeometryBytes)));
+ assertEquals("ST_INVALID_SRID_VALUE", sridException.getCondition());
+ // Coordinates outside geography bounds should not be allowed even with a
valid SRID.
+ ByteBuffer oobWkbBuf =
ByteBuffer.allocate(21).order(ByteOrder.LITTLE_ENDIAN);
+ oobWkbBuf.put((byte) 0x01).putInt(1).putDouble(200.0).putDouble(100.0);
+ // For example: POINT(200 100) geometry with SRID 4326.
+ byte[] oobWkb = oobWkbBuf.array();
+ byte[] oobGeomBytes = new byte[sridLen + oobWkb.length];
+ byte[] srid4326 =
ByteBuffer.allocate(sridLen).order(end).putInt(testGeographySrid).array();
+ System.arraycopy(srid4326, 0, oobGeomBytes, 0, sridLen);
+ System.arraycopy(oobWkb, 0, oobGeomBytes, sridLen, oobWkb.length);
+ SparkIllegalArgumentException coordinateException = assertThrows(
+ SparkIllegalArgumentException.class,
+ () -> STUtils.geometryToGeography(GeometryVal.fromBytes(oobGeomBytes)));
+ assertEquals("WKB_PARSE_ERROR", coordinateException.getCondition());
+ }
+
@Test
void testGeographyToGeometry() {
GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index 7566e69392fd..34969f43e1f7 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -308,6 +308,42 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+SELECT
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326) AS GEOGRAPHY(4326)))) AS result
+-- !query analysis
+Project
[hex(st_asbinary(cast(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040,
4326) as geography(4326)))) AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOGRAPHY(4326)) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "sqlExpr" :
"\"CAST(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0) AS
GEOGRAPHY(4326))\"",
+ "srcType" : "\"GEOMETRY(0)\"",
+ "targetType" : "\"GEOGRAPHY(4326)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 93,
+ "fragment" :
"CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOGRAPHY(4326))"
+ } ]
+}
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'010100000000000000000069400000000000005940',
4326) AS GEOGRAPHY(4326)) AS result
+-- !query analysis
+Project [cast(st_geomfromwkb(0x010100000000000000000069400000000000005940,
4326) as geography(4326)) AS result#x]
++- OneRowRelation
+
+
-- !query
SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index f3543837ddfc..0db835bc7e85 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -58,6 +58,13 @@ SELECT
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f000000000
-- Casting GEOMETRY(ANY) to GEOMETRY(<srid>) is not allowed.
SELECT
CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOMETRY(ANY)
AS GEOMETRY(4326)) AS result;
+-- Casting GEOMETRY to GEOGRAPHY is allowed only if SRIDs match.
+SELECT
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326) AS GEOGRAPHY(4326)))) AS result;
+-- Error handling: mismatched SRIDs.
+SELECT CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOGRAPHY(4326)) AS result;
+-- Error handling: coordinates out of geographic bounds.
+SELECT CAST(ST_GeomFromWKB(X'010100000000000000000069400000000000005940',
4326) AS GEOGRAPHY(4326)) AS result;
+
---- Geospatial type coercion
-- Array
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index 03f5728d7d3a..11ca99bc2304 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -345,6 +345,54 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+SELECT
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326) AS GEOGRAPHY(4326)))) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOGRAPHY(4326)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+ "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+ "sqlState" : "42K09",
+ "messageParameters" : {
+ "sqlExpr" :
"\"CAST(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0) AS
GEOGRAPHY(4326))\"",
+ "srcType" : "\"GEOMETRY(0)\"",
+ "targetType" : "\"GEOGRAPHY(4326)\""
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 8,
+ "stopIndex" : 93,
+ "fragment" :
"CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOGRAPHY(4326))"
+ } ]
+}
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'010100000000000000000069400000000000005940',
4326) AS GEOGRAPHY(4326)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+ "errorClass" : "WKB_PARSE_ERROR",
+ "sqlState" : "22023",
+ "messageParameters" : {
+ "parseError" : "Invalid coordinate value found",
+ "pos" : "5"
+ }
+}
+
+
-- !query
SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
-- !query schema
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]