This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 610bb52242c7 [SPARK-54104][GEO][SQL] Disallow casting geospatial types 
to/from other data types
610bb52242c7 is described below

commit 610bb52242c764e70aaba2f58f41338cda2299cd
Author: Uros Bojanic <[email protected]>
AuthorDate: Fri Oct 31 23:31:38 2025 +0800

    [SPARK-54104][GEO][SQL] Disallow casting geospatial types to/from other 
data types
    
    ### What changes were proposed in this pull request?
    This PR disallows casting `GEOGRAPHY` and `GEOMETRY` to/from other data 
types in the system (such as `BOOLEAN`, `BINARY`, `STRING`, and others). Note 
that these types were recently introduced as part of: 
https://github.com/apache/spark/pull/52491.
    
    ### Why are the changes needed?
    `GeographyType` and `GeometryType` are not interoperable with any other 
non-geospatial data types, so we need to forbid such casts explicitly in order 
to avoid unexpected failures.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, casts are disallowed for newly introduced geospatial types.
    
    ### How was this patch tested?
    Added new unit tests to verify the expected behaviour:
    - `CastWithAnsiOffSuite`
    - `CastWithAnsiOnSuite`
    
    Added new appropriate end-to-end SQL tests:
    - `st-functions`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52806 from uros-db/geo-cast-disallow.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../spark/sql/catalyst/expressions/Cast.scala      |  7 ++++
 .../expressions/st/STExpressionUtils.scala         | 32 +++++++++++++++
 .../sql/catalyst/expressions/CastSuiteBase.scala   | 35 ++++++++++++++++
 .../analyzer-results/nonansi/st-functions.sql.out  | 44 ++++++++++++++++++++
 .../analyzer-results/st-functions.sql.out          | 44 ++++++++++++++++++++
 .../resources/sql-tests/inputs/st-functions.sql    |  6 +++
 .../sql-tests/results/nonansi/st-functions.sql.out | 48 ++++++++++++++++++++++
 .../sql-tests/results/st-functions.sql.out         | 48 ++++++++++++++++++++++
 8 files changed, 264 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 23610f97dbbd..5b76c7d225e1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import 
org.apache.spark.sql.catalyst.expressions.st.STExpressionUtils.isGeoSpatialType
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.types.{PhysicalFractionalType, 
PhysicalIntegralType, PhysicalNumericType}
@@ -93,6 +94,9 @@ object Cast extends QueryErrorsBase {
 
     case (NullType, _) => true
 
+    // Geospatial types cannot be cast to/from other data types.
+    case (fromType, toType) if isGeoSpatialType(fromType) != 
isGeoSpatialType(toType) => false
+
     case (_, _: StringType) => true
 
     case (_: StringType, _: BinaryType) => true
@@ -208,6 +212,9 @@ object Cast extends QueryErrorsBase {
 
     case (NullType, _) => true
 
+    // Geospatial types cannot be cast to/from other data types.
+    case (fromType, toType) if isGeoSpatialType(fromType) != 
isGeoSpatialType(toType) => false
+
     case (_, _: StringType) => true
 
     case (_: StringType, BinaryType) => true
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/STExpressionUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/STExpressionUtils.scala
new file mode 100755
index 000000000000..055173ec39ad
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/STExpressionUtils.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.st
+
+import org.apache.spark.sql.types._
+
+private[sql] object STExpressionUtils {
+
+  /**
+   * Checks if the given data type is a geospatial type (i.e. GeometryType or 
GeographyType).
+   */
+  def isGeoSpatialType(dt: DataType): Boolean = dt match {
+    case _: GeometryType | _: GeographyType => true
+    case _ => false
+  }
+
+}
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index e997cd6f5257..7a87c86b63c0 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1454,6 +1454,41 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
       (child: Expression) => Cast(child, StringType), TimeType())
   }
 
+  test("Casting geospatial data types to/from other data types") {
+    val geoTypes: Seq[DataType] = Seq(
+      GeographyType(4326),
+      GeographyType("ANY"),
+      GeometryType(0),
+      GeometryType(3857),
+      GeometryType(4326),
+      GeometryType("ANY")
+    )
+    val otherTypes: Seq[DataType] = Seq(
+      BinaryType,
+      BooleanType,
+      ByteType,
+      StringType,
+      StringType("UTF8_LCASE"),
+      StringType("UNICODE_CI"),
+      ShortType,
+      IntegerType,
+      LongType,
+      FloatType,
+      DoubleType
+    )
+    // Iterate over the test cases and verify casting.
+    geoTypes.foreach { geoType =>
+      otherTypes.foreach { otherType =>
+        // Cast cannot be performed from `geoType` to `otherType`.
+        assert(!Cast.canCast(geoType, otherType))
+        assert(!Cast.canAnsiCast(geoType, otherType))
+        // Cast cannot be performed from `otherType` to `geoType`.
+        assert(!Cast.canCast(otherType, geoType))
+        assert(!Cast.canAnsiCast(otherType, geoType))
+      }
+    }
+  }
+
   test("cast string to time") {
     checkEvaluation(cast(Literal.create("0:0:0"), TimeType()), 0L)
     checkEvaluation(cast(Literal.create(" 01:2:3.01   "), TimeType(2)), 
localTime(1, 2, 3, 10000))
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
index fe15f3b3a9a9..fe2dda3f1967 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
@@ -22,6 +22,50 @@ InsertIntoHadoopFsRelationCommand file:[not included in 
comparison]/{warehouse_d
    +- LocalRelation [col1#x]
 
 
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
STRING) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
STRING)\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"STRING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS STRING)"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) 
AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"CAST(X'0101000000000000000000F03F0000000000000040' AS 
GEOMETRY(4326))\"",
+    "srcType" : "\"BINARY\"",
+    "targetType" : "\"GEOMETRY(4326)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "CAST(X'0101000000000000000000f03f0000000000000040' AS 
GEOMETRY(4326))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query analysis
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index fe15f3b3a9a9..fe2dda3f1967 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -22,6 +22,50 @@ InsertIntoHadoopFsRelationCommand file:[not included in 
comparison]/{warehouse_d
    +- LocalRelation [col1#x]
 
 
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
STRING) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
STRING)\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"STRING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS STRING)"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) 
AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"CAST(X'0101000000000000000000F03F0000000000000040' AS 
GEOMETRY(4326))\"",
+    "srcType" : "\"BINARY\"",
+    "targetType" : "\"GEOMETRY(4326)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "CAST(X'0101000000000000000000f03f0000000000000040' AS 
GEOMETRY(4326))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index ecffe55231e7..dc688e4a8994 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -7,6 +7,12 @@ INSERT INTO geodata VALUES
 (NULL),
 (X'0101000000000000000000F03F0000000000000040');
 
+--- Casting geospatial data types
+
+-- GEOGRAPHY and GEOMETRY data types cannot be cast to/from other data types.
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
STRING) AS result;
+SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) 
AS result;
+
 ---- ST reader/writer expressions
 
 -- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types.
diff --git 
a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
index 5787d02e29c2..e75d4ba419e2 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
@@ -25,6 +25,54 @@ struct<>
 
 
 
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
STRING) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
STRING)\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"STRING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS STRING)"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) 
AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"CAST(X'0101000000000000000000F03F0000000000000040' AS 
GEOMETRY(4326))\"",
+    "srcType" : "\"BINARY\"",
+    "targetType" : "\"GEOMETRY(4326)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "CAST(X'0101000000000000000000f03f0000000000000040' AS 
GEOMETRY(4326))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index 5787d02e29c2..e75d4ba419e2 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -25,6 +25,54 @@ struct<>
 
 
 
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
STRING) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
STRING)\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"STRING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS STRING)"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) 
AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"CAST(X'0101000000000000000000F03F0000000000000040' AS 
GEOMETRY(4326))\"",
+    "srcType" : "\"BINARY\"",
+    "targetType" : "\"GEOMETRY(4326)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "CAST(X'0101000000000000000000f03f0000000000000040' AS 
GEOMETRY(4326))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to