This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b99a561c8e2 [SPARK-55539][GEO][SQL] Allow casting from GeographyType 
to GeometryType
1b99a561c8e2 is described below

commit 1b99a561c8e2afdbb3af5933e227b81ae44cfd1a
Author: Uros Bojanic <[email protected]>
AuthorDate: Thu Feb 26 19:03:44 2026 +0800

    [SPARK-55539][GEO][SQL] Allow casting from GeographyType to GeometryType
    
    ### What changes were proposed in this pull request?
    This PR allows casting `GEOMETRY` to `GEOGRAPHY` if they have the same 
geographic SRID.
    
    ### Why are the changes needed?
    Enable explicit casting between geospatial types.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, casting `GEOMETRY` to `GEOGRAPHY` is now allowed.
    
    ### How was this patch tested?
    Added new unit tests:
    - `StUtilsSuite`
    - `CastSuiteBase`
    
    Added new e2e SQL tests:
    - `st-functions`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #54331 from uros-db/geo-cast-geom_geog.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/catalyst/util/STUtils.java    | 19 +++++++++
 .../spark/sql/catalyst/expressions/Cast.scala      | 31 ++++++++++++--
 .../sql/catalyst/expressions/CastSuiteBase.scala   | 32 +++++++++++++++
 .../spark/sql/catalyst/util/StUtilsSuite.java      | 30 ++++++++++++++
 .../analyzer-results/st-functions.sql.out          | 36 ++++++++++++++++
 .../resources/sql-tests/inputs/st-functions.sql    |  7 ++++
 .../sql-tests/results/st-functions.sql.out         | 48 ++++++++++++++++++++++
 7 files changed, 200 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
index c02192965fd7..4026bbb2c22d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.catalyst.util;
 
+import org.apache.spark.sql.catalyst.util.geo.WkbParseException;
+import org.apache.spark.sql.catalyst.util.geo.WkbReader;
 import org.apache.spark.sql.errors.QueryExecutionErrors;
 import org.apache.spark.sql.types.GeographyType;
 import org.apache.spark.sql.types.GeometryType;
@@ -52,6 +54,23 @@ public final class STUtils {
 
   /** Geospatial type casting utility methods. */
 
+  // Cast geometry to geography.
+  public static GeographyVal geometryToGeography(GeometryVal geometryVal) {
+    // We first need to check whether the input geometry has a geographic SRID.
+    int srid = stSrid(geometryVal);
+    if(!GeographyType.isSridSupported(srid)) {
+      throw QueryExecutionErrors.stInvalidSridValueError(String.valueOf(srid));
+    }
+    // We also need to check whether the input geometry has coordinates in 
geography bounds.
+    try {
+      byte[] wkb = stAsBinary(geometryVal);
+      new WkbReader(true).read(wkb, srid);
+    } catch (WkbParseException e) {
+      throw QueryExecutionErrors.wkbParseError(e.getParseError(), 
e.getPosition());
+    }
+    return toPhysVal(Geography.fromBytes(geometryVal.getBytes()));
+  }
+
   // Cast geography to geometry.
   public static GeometryVal geographyToGeometry(GeographyVal geographyVal) {
     // Geographic SRID is always a valid SRID for geometry, so we don't need 
to check it.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 849f3b8a0d1b..c51d3508d04a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -38,7 +38,7 @@ import 
org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte,
 import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{GeographyVal, UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String, 
VariantVal}
 import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -173,6 +173,9 @@ object Cast extends QueryErrorsBase {
     // Casts from concrete GEOMETRY(srid) to mixed GEOMETRY(ANY) is allowed.
     case (gt1: GeometryType, gt2: GeometryType) if !gt1.isMixedSrid && 
gt2.isMixedSrid =>
       true
+    // Casting from GEOMETRY to GEOGRAPHY with the same SRID is allowed.
+    case (geom: GeometryType, geog: GeographyType) if geom.srid == geog.srid =>
+      true
 
     case _ => false
   }
@@ -309,6 +312,9 @@ object Cast extends QueryErrorsBase {
     // Casts from concrete GEOMETRY(srid) to mixed GEOMETRY(ANY) is allowed.
     case (gt1: GeometryType, gt2: GeometryType) if !gt1.isMixedSrid && 
gt2.isMixedSrid =>
       true
+    // Casting from GEOMETRY to GEOGRAPHY with the same SRID is allowed.
+    case (geom: GeometryType, geog: GeographyType) if geom.srid == geog.srid =>
+      true
 
     case _ => false
   }
@@ -1200,6 +1206,14 @@ case class Cast(
       b => numeric.toFloat(b)
   }
 
+  // GeographyConverter
+  private[this] def castToGeography(from: DataType): Any => Any = from match {
+    case _: GeographyType =>
+      identity
+    case _: GeometryType =>
+      buildCast[GeometryVal](_, STUtils.geometryToGeography)
+  }
+
   // GeometryConverter
   private[this] def castToGeometry(from: DataType): Any => Any = from match {
     case _: GeographyType =>
@@ -1287,7 +1301,7 @@ case class Cast(
         case FloatType => castToFloat(from)
         case LongType => castToLong(from)
         case DoubleType => castToDouble(from)
-        case _: GeographyType => identity
+        case _: GeographyType => castToGeography(from)
         case _: GeometryType => castToGeometry(from)
         case array: ArrayType =>
           castArray(from.asInstanceOf[ArrayType].elementType, 
array.elementType)
@@ -1397,7 +1411,7 @@ case class Cast(
     case FloatType => castToFloatCode(from, ctx)
     case LongType => castToLongCode(from, ctx)
     case DoubleType => castToDoubleCode(from, ctx)
-    case _: GeographyType => (c, evPrim, _) => code"$evPrim = $c;"
+    case _: GeographyType => castToGeographyCode(from)
     case _: GeometryType => castToGeometryCode(from)
 
     case array: ArrayType =>
@@ -2245,6 +2259,17 @@ case class Cast(
     }
   }
 
+  private[this] def castToGeographyCode(from: DataType): CastFunction = {
+    from match {
+      case _: GeographyType =>
+        (c, evPrim, _) =>
+          code"$evPrim = $c;"
+      case _: GeometryType =>
+        (c, evPrim, _) =>
+          code"$evPrim = 
org.apache.spark.sql.catalyst.util.STUtils.geometryToGeography($c);"
+    }
+  }
+
   private[this] def castToGeometryCode(from: DataType): CastFunction = {
     from match {
       case _: GeographyType =>
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index e18a489d36f3..e888432ef91e 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1544,6 +1544,38 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
     }
   }
 
+  test("Casting GeometryType to GeographyType") {
+    // Casting from GEOMETRY to GEOGRAPHY is only allowed if the SRIDs are the 
same.
+
+    // Valid cast test cases.
+    val canAnsiCastTestCases: Seq[(DataType, DataType)] = Seq(
+      (GeometryType(4326), GeographyType(4326)),
+      (GeometryType("ANY"), GeographyType("ANY"))
+    )
+    // Iterate over the test cases and verify casting.
+    canAnsiCastTestCases.foreach { case (fromType, toType) =>
+      // Cast can be performed from `fromType` to `toType`.
+      assert(Cast.canCast(fromType, toType))
+      assert(Cast.canAnsiCast(fromType, toType))
+    }
+
+    // Invalid cast test cases.
+    val cannotAnsiCastTestCases: Seq[(DataType, DataType)] = Seq(
+      (GeometryType(0), GeographyType(4326)),
+      (GeometryType(3857), GeographyType(4326)),
+      (GeometryType("ANY"), GeographyType(4326)),
+      (GeometryType(0), GeographyType("ANY")),
+      (GeometryType(3857), GeographyType("ANY")),
+      (GeometryType(4326), GeographyType("ANY"))
+    )
+    // Iterate over the test cases and verify casting.
+    cannotAnsiCastTestCases.foreach { case (fromType, toType) =>
+      // Cast cannot be performed from `fromType` to `toType`.
+      assert(!Cast.canCast(fromType, toType))
+      assert(!Cast.canAnsiCast(fromType, toType))
+    }
+  }
+
   test("Casting GeometryType to GeometryType") {
     // Casting from fixed SRID GEOMETRY(<srid>) to mixed SRID GEOMETRY(ANY) is 
always allowed.
     // Type casting is always safe in this direction, so no additional 
constraints are imposed.
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
index e5e6d32ab99f..aa1b4735cf62 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -45,6 +45,7 @@ class STUtilsSuite {
   // A sample Geometry byte array for testing purposes, representing a POINT(1 
2) with SRID 0.
   private final int testGeometrySrid = 0;
   private final byte[] testGeometryBytes;
+  private final byte[] testGeometry4326Bytes;
 
   // Common constants used in geo value construction.
   private final ByteOrder end = Geo.DEFAULT_ENDIANNESS;
@@ -63,10 +64,39 @@ class STUtilsSuite {
     testGeometryBytes = new byte[sridLen + wkbLen];
     System.arraycopy(geomSrid, 0, testGeometryBytes, 0, sridLen);
     System.arraycopy(testWkb, 0, testGeometryBytes, sridLen, wkbLen);
+    testGeometry4326Bytes = new byte[sridLen + wkbLen];
+    System.arraycopy(geogSrid, 0, testGeometry4326Bytes, 0, sridLen);
+    System.arraycopy(testWkb, 0, testGeometry4326Bytes, sridLen, wkbLen);
   }
 
   /** Geospatial type casting utility methods. */
 
+  @Test
+  void testGeometryToGeography() {
+    GeometryVal geometryVal = GeometryVal.fromBytes(testGeometry4326Bytes);
+    GeographyVal geographyVal = STUtils.geometryToGeography(geometryVal);
+    assertNotNull(geographyVal);
+    assertArrayEquals(geometryVal.getBytes(), geographyVal.getBytes());
+    // Non-geographic SRID should not be allowed for geometry to geography 
casting.
+    SparkIllegalArgumentException sridException = assertThrows(
+      SparkIllegalArgumentException.class,
+      () -> 
STUtils.geometryToGeography(GeometryVal.fromBytes(testGeometryBytes)));
+    assertEquals("ST_INVALID_SRID_VALUE", sridException.getCondition());
+    // Coordinates outside geography bounds should not be allowed even with a 
valid SRID.
+    ByteBuffer oobWkbBuf = 
ByteBuffer.allocate(21).order(ByteOrder.LITTLE_ENDIAN);
+    oobWkbBuf.put((byte) 0x01).putInt(1).putDouble(200.0).putDouble(100.0);
+    // For example: POINT(200 100) geometry with SRID 4326.
+    byte[] oobWkb = oobWkbBuf.array();
+    byte[] oobGeomBytes = new byte[sridLen + oobWkb.length];
+    byte[] srid4326 = 
ByteBuffer.allocate(sridLen).order(end).putInt(testGeographySrid).array();
+    System.arraycopy(srid4326, 0, oobGeomBytes, 0, sridLen);
+    System.arraycopy(oobWkb, 0, oobGeomBytes, sridLen, oobWkb.length);
+    SparkIllegalArgumentException coordinateException = assertThrows(
+      SparkIllegalArgumentException.class,
+      () -> STUtils.geometryToGeography(GeometryVal.fromBytes(oobGeomBytes)));
+    assertEquals("WKB_PARSE_ERROR", coordinateException.getCondition());
+  }
+
   @Test
   void testGeographyToGeometry() {
     GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index 7566e69392fd..34969f43e1f7 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -308,6 +308,42 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT 
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
 4326) AS GEOGRAPHY(4326)))) AS result
+-- !query analysis
+Project 
[hex(st_asbinary(cast(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040,
 4326) as geography(4326)))) AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOGRAPHY(4326)) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0) AS 
GEOGRAPHY(4326))\"",
+    "srcType" : "\"GEOMETRY(0)\"",
+    "targetType" : "\"GEOGRAPHY(4326)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 93,
+    "fragment" : 
"CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOGRAPHY(4326))"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'010100000000000000000069400000000000005940', 
4326) AS GEOGRAPHY(4326)) AS result
+-- !query analysis
+Project [cast(st_geomfromwkb(0x010100000000000000000069400000000000005940, 
4326) as geography(4326)) AS result#x]
++- OneRowRelation
+
+
 -- !query
 SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index f3543837ddfc..0db835bc7e85 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -58,6 +58,13 @@ SELECT 
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f000000000
 -- Casting GEOMETRY(ANY) to GEOMETRY(<srid>) is not allowed.
 SELECT 
CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOMETRY(ANY)
 AS GEOMETRY(4326)) AS result;
 
+-- Casting GEOMETRY to GEOGRAPHY is allowed only if SRIDs match.
+SELECT 
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
 4326) AS GEOGRAPHY(4326)))) AS result;
+-- Error handling: mismatched SRIDs.
+SELECT CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOGRAPHY(4326)) AS result;
+-- Error handling: coordinates out of geographic bounds.
+SELECT CAST(ST_GeomFromWKB(X'010100000000000000000069400000000000005940', 
4326) AS GEOGRAPHY(4326)) AS result;
+
 ---- Geospatial type coercion
 
 -- Array
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index 03f5728d7d3a..11ca99bc2304 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -345,6 +345,54 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT 
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
 4326) AS GEOGRAPHY(4326)))) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOGRAPHY(4326)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0) AS 
GEOGRAPHY(4326))\"",
+    "srcType" : "\"GEOMETRY(0)\"",
+    "targetType" : "\"GEOGRAPHY(4326)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 93,
+    "fragment" : 
"CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOGRAPHY(4326))"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(ST_GeomFromWKB(X'010100000000000000000069400000000000005940', 
4326) AS GEOGRAPHY(4326)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "WKB_PARSE_ERROR",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "parseError" : "Invalid coordinate value found",
+    "pos" : "5"
+  }
+}
+
+
 -- !query
 SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to