This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 56eaa8d2ac8c [SPARK-54079][GEO][SQL] Introduce the framework for
adding ST expressions in Catalyst
56eaa8d2ac8c is described below
commit 56eaa8d2ac8c7e253cdc6df341c54cf98262cbbd
Author: Uros Bojanic <[email protected]>
AuthorDate: Thu Oct 30 10:22:26 2025 +0800
[SPARK-54079][GEO][SQL] Introduce the framework for adding ST expressions
in Catalyst
### What changes were proposed in this pull request?
This PR implements rudimentary `WKB` read/write capabilities for
`Geography` and `Geometry`, introduces a minimal set of ST expressions for WKB
handling in Catalyst, and registers the new SQL functions under the new
`st_funcs` group.
Note that full standard geospatial format parsing support (as well as other
specialized ST expressions) will be properly implemented and fully tested in
the upcoming separate tasks.
### Why are the changes needed?
Establish a minimal ST expression framework, laying the groundwork for
proper WKB handling support, and the foundations for expanding spatial function
support in the near future.
### Does this PR introduce _any_ user-facing change?
Yes, this PR introduces 3 new Catalyst expressions and corresponding SQL
functions: `ST_AsBinary`, `ST_GeogFromWKB`, `ST_GeomFromWKB`.
### How was this patch tested?
Added new Scala unit test suites:
- `StUtilsSuite.java`
- `StExpressionsSuite.java`
Added a new E2E SQL test file:
- `st-functions.sql`
Added appropriate test cases to:
- `GeographyExecutionSuite.java`
- `GeometryExecutionSuite.java`
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52784 from uros-db/geo-st-expressions.
Lead-authored-by: Uros Bojanic <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/expressions/ExpressionInfo.java | 2 +-
.../apache/spark/sql/catalyst/util/Geography.java | 5 +-
.../apache/spark/sql/catalyst/util/Geometry.java | 5 +-
.../apache/spark/sql/catalyst/util/STUtils.java | 70 +++++++++
.../sql/catalyst/analysis/FunctionRegistry.scala | 5 +
.../spark/sql/catalyst/expressions/literals.scala | 2 +
.../catalyst/expressions/st/stExpressions.scala | 174 +++++++++++++++++++++
.../sql/catalyst/util/GeographyExecutionSuite.java | 26 +--
.../sql/catalyst/util/GeometryExecutionSuite.java | 26 +--
.../spark/sql/catalyst/util/StUtilsSuite.java | 93 +++++++++++
.../sql-functions/sql-expression-schema.md | 3 +
.../analyzer-results/nonansi/st-functions.sql.out | 13 ++
.../analyzer-results/st-functions.sql.out | 13 ++
.../sql-tests/inputs/nonansi/st-functions.sql | 1 +
.../resources/sql-tests/inputs/st-functions.sql | 5 +
.../sql-tests/results/nonansi/st-functions.sql.out | 15 ++
.../sql-tests/results/st-functions.sql.out | 15 ++
.../org/apache/spark/sql/STExpressionsSuite.scala | 51 ++++++
.../sql/expressions/ExpressionInfoSuite.scala | 2 +-
19 files changed, 498 insertions(+), 28 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 310d18ddb348..dd56c650c073 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -48,7 +48,7 @@ public class ExpressionInfo {
"collection_funcs", "predicate_funcs", "conditional_funcs",
"conversion_funcs",
"csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs",
"json_funcs",
"lambda_funcs", "map_funcs", "math_funcs", "misc_funcs",
"string_funcs", "struct_funcs",
- "window_funcs", "xml_funcs", "table_funcs", "url_funcs",
"variant_funcs"));
+ "window_funcs", "xml_funcs", "table_funcs", "url_funcs",
"variant_funcs", "st_funcs"));
private static final Set<String> validSources =
new HashSet<>(Arrays.asList("built-in", "hive", "python_udf",
"scala_udf", "sql_udf",
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
index f7b0df8990d3..c46c2368832f 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
@@ -77,7 +77,10 @@ public final class Geography implements Geo {
// Returns a Geography object with the specified SRID value by parsing the
input WKB.
public static Geography fromWkb(byte[] wkb, int srid) {
- throw new UnsupportedOperationException("Geography WKB parsing is not yet
supported.");
+ byte[] bytes = new byte[HEADER_SIZE + wkb.length];
+ ByteBuffer.wrap(bytes).order(DEFAULT_ENDIANNESS).putInt(srid);
+ System.arraycopy(wkb, 0, bytes, WKB_OFFSET, wkb.length);
+ return fromBytes(bytes);
}
// Overload for the WKB reader where we use the default SRID for Geography.
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
index 81cdaeb97ce2..c4b6e5d0e4bd 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
@@ -77,7 +77,10 @@ public final class Geometry implements Geo {
// Returns a Geometry object with the specified SRID value by parsing the
input WKB.
public static Geometry fromWkb(byte[] wkb, int srid) {
- throw new UnsupportedOperationException("Geometry WKB parsing is not yet
supported.");
+ byte[] bytes = new byte[HEADER_SIZE + wkb.length];
+ ByteBuffer.wrap(bytes).order(DEFAULT_ENDIANNESS).putInt(srid);
+ System.arraycopy(wkb, 0, bytes, WKB_OFFSET, wkb.length);
+ return fromBytes(bytes);
}
// Overload for the WKB reader where we use the default SRID for Geometry.
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
new file mode 100644
index 000000000000..641382a0f959
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.util;
+
+import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.unsafe.types.GeometryVal;
+
+// This class defines static methods that used to implement ST expressions
using `StaticInvoke`.
+public final class STUtils {
+
+ /** Conversion methods from physical values to Geography/Geometry objects. */
+
+ // Converts a GEOGRAPHY from its physical value to the corresponding
`Geography` object
+ static Geography fromPhysVal(GeographyVal value) {
+ return Geography.fromBytes(value.getBytes());
+ }
+
+ // Converts a GEOMETRY from its physical value to the corresponding
`Geometry` object
+ static Geometry fromPhysVal(GeometryVal value) {
+ return Geometry.fromBytes(value.getBytes());
+ }
+
+ /** Conversion methods from Geography/Geometry objects to physical values. */
+
+ // Converts a `Geography` object to the corresponding GEOGRAPHY physical
value.
+ static GeographyVal toPhysVal(Geography g) {
+ return g.getValue();
+ }
+
+ // Converts a `Geometry` object to the corresponding GEOMETRY physical value.
+ static GeometryVal toPhysVal(Geometry g) {
+ return g.getValue();
+ }
+
+ /** Methods for implementing ST expressions. */
+
+ // ST_AsBinary
+ public static byte[] stAsBinary(GeographyVal geo) {
+ return fromPhysVal(geo).toWkb();
+ }
+
+ public static byte[] stAsBinary(GeometryVal geo) {
+ return fromPhysVal(geo).toWkb();
+ }
+
+ // ST_GeogFromWKB
+ public static GeographyVal stGeogFromWKB(byte[] wkb) {
+ return toPhysVal(Geography.fromWkb(wkb));
+ }
+
+ // ST_GeomFromWKB
+ public static GeometryVal stGeomFromWKB(byte[] wkb) {
+ return toPhysVal(Geometry.fromWkb(wkb));
+ }
+
+}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 0a596a8bd63e..97f8cbc23b7a 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -873,6 +873,11 @@ object FunctionRegistry {
expression[SchemaOfVariantAgg]("schema_of_variant_agg"),
expression[ToVariantObject]("to_variant_object"),
+ // Spatial
+ expression[ST_AsBinary]("st_asbinary"),
+ expression[ST_GeogFromWKB]("st_geogfromwkb"),
+ expression[ST_GeomFromWKB]("st_geomfromwkb"),
+
// cast
expression[Cast]("cast"),
// Cast aliases (SPARK-16730)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index c799415dfc70..710bd671b29e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -252,6 +252,8 @@ object Literal {
case PhysicalNullType => true
case PhysicalShortType => v.isInstanceOf[Short]
case _: PhysicalStringType => v.isInstanceOf[UTF8String]
+ case _: PhysicalGeographyType => v.isInstanceOf[GeographyVal]
+ case _: PhysicalGeometryType => v.isInstanceOf[GeometryVal]
case PhysicalVariantType => v.isInstanceOf[VariantVal]
case st: PhysicalStructType =>
v.isInstanceOf[InternalRow] && {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
new file mode 100755
index 000000000000..12f3d13746d5
--- /dev/null
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/st/stExpressions.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.expressions.objects._
+import org.apache.spark.sql.catalyst.trees._
+import org.apache.spark.sql.catalyst.util.{Geography, Geometry, STUtils}
+import org.apache.spark.sql.types._
+
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// This file defines expressions for geospatial operations.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+// Useful constants for ST expressions.
+private[sql] object ExpressionDefaults {
+ val DEFAULT_GEOGRAPHY_SRID: Int = Geography.DEFAULT_SRID
+ val DEFAULT_GEOMETRY_SRID: Int = Geometry.DEFAULT_SRID
+}
+
+/** ST writer expressions. */
+
+/**
+ * Returns the input GEOGRAPHY or GEOMETRY value in WKB format.
+ * See
https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * for more details on the WKB format.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(geo) - Returns the geospatial value (value of type GEOGRAPHY
or GEOMETRY) "
+ + "in WKB format.",
+ arguments = """
+ Arguments:
+ * geo - A geospatial value, either a GEOGRAPHY or a GEOMETRY.
+ """,
+ examples = """
+ Examples:
+ > SELECT
hex(_FUNC_(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
+ > SELECT
hex(_FUNC_(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
+ """,
+ since = "4.1.0",
+ group = "st_funcs"
+)
+case class ST_AsBinary(geo: Expression)
+ extends RuntimeReplaceable
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(
+ TypeCollection(GeographyType, GeometryType)
+ )
+
+ override lazy val replacement: Expression = StaticInvoke(
+ classOf[STUtils],
+ BinaryType,
+ "stAsBinary",
+ Seq(geo),
+ returnNullable = false
+ )
+
+ override def prettyName: String = "st_asbinary"
+
+ override def child: Expression = geo
+
+ override protected def withNewChildInternal(newChild: Expression):
ST_AsBinary =
+ copy(geo = newChild)
+}
+
+/** ST reader expressions. */
+
+/**
+ * Parses the WKB description of a geography and returns the corresponding
GEOGRAPHY value. The SRID
+ * value of the returned GEOGRAPHY value is 4326.
+ * See
https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * for more details on the WKB format.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(wkb) - Parses the WKB description of a geography and returns
the corresponding "
+ + "GEOGRAPHY value.",
+ arguments = """
+ Arguments:
+ * wkb - A BINARY value in WKB format, representing a GEOGRAPHY value.
+ """,
+ examples = """
+ Examples:
+ > SELECT
hex(st_asbinary(_FUNC_(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
+ """,
+ since = "4.1.0",
+ group = "st_funcs"
+)
+case class ST_GeogFromWKB(wkb: Expression)
+ extends RuntimeReplaceable
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+ override lazy val replacement: Expression = StaticInvoke(
+ classOf[STUtils],
+ GeographyType(ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID),
+ "stGeogFromWKB",
+ Seq(wkb),
+ returnNullable = false
+ )
+
+ override def prettyName: String = "st_geogfromwkb"
+
+ override def child: Expression = wkb
+
+ override protected def withNewChildInternal(newChild: Expression):
ST_GeogFromWKB =
+ copy(wkb = newChild)
+}
+
+/**
+ * Parses the WKB description of a geometry and returns the corresponding
GEOMETRY value. The SRID
+ * value of the returned GEOMETRY value is 0.
+ * See
https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary
+ * for more details on the WKB format.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(wkb) - Parses the WKB description of a geometry and returns
the corresponding "
+ + "GEOMETRY value.",
+ arguments = """
+ Arguments:
+ * wkb - A BINARY value in WKB format, representing a GEOMETRY value.
+ """,
+ examples = """
+ Examples:
+ > SELECT
hex(st_asbinary(_FUNC_(X'0101000000000000000000F03F0000000000000040')));
+ 0101000000000000000000F03F0000000000000040
+ """,
+ since = "4.1.0",
+ group = "st_funcs"
+)
+case class ST_GeomFromWKB(wkb: Expression)
+ extends RuntimeReplaceable
+ with ImplicitCastInputTypes
+ with UnaryLike[Expression] {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+ override lazy val replacement: Expression = StaticInvoke(
+ classOf[STUtils],
+ GeometryType(ExpressionDefaults.DEFAULT_GEOMETRY_SRID),
+ "stGeomFromWKB",
+ Seq(wkb),
+ returnNullable = false
+ )
+
+ override def prettyName: String = "st_geomfromwkb"
+
+ override def child: Expression = wkb
+
+ override protected def withNewChildInternal(newChild: Expression):
ST_GeomFromWKB =
+ copy(wkb = newChild)
+}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
index de1f4d916d2e..f7a0a1929bc1 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
@@ -83,23 +83,25 @@ class GeographyExecutionTest {
/** Tests for Geography WKB parsing. */
@Test
- void testFromWkbWithSridUnsupported() {
+ void testFromWkbWithSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geography.fromWkb(wkb, 0)
- );
- assertEquals("Geography WKB parsing is not yet supported.",
exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is
not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be
updated accordingly.
+ Geography geography = Geography.fromWkb(wkb, 4326);
+ assertNotNull(geography);
+ assertArrayEquals(wkb, geography.toWkb());
+ assertEquals(4326, geography.srid());
}
@Test
- void testFromWkbNoSridUnsupported() {
+ void testFromWkbNoSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geography.fromWkb(wkb)
- );
- assertEquals("Geography WKB parsing is not yet supported.",
exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is
not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be
updated accordingly.
+ Geography geography = Geography.fromWkb(wkb);
+ assertNotNull(geography);
+ assertArrayEquals(wkb, geography.toWkb());
+ assertEquals(4326, geography.srid());
}
/** Tests for Geography EWKB parsing. */
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
index 17950f9cad0d..be43596b7f5a 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
@@ -83,23 +83,25 @@ class GeometryExecutionTest {
/** Tests for Geometry WKB parsing. */
@Test
- void testFromWkbWithSridUnsupported() {
+ void testFromWkbWithSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geometry.fromWkb(wkb, 0)
- );
- assertEquals("Geometry WKB parsing is not yet supported.",
exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is
not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be
updated accordingly.
+ Geometry geometry = Geometry.fromWkb(wkb, 4326);
+ assertNotNull(geometry);
+ assertArrayEquals(wkb, geometry.toWkb());
+ assertEquals(4326, geometry.srid());
}
@Test
- void testFromWkbNoSridUnsupported() {
+ void testFromWkbNoSridRudimentary() {
byte[] wkb = new byte[]{1, 2, 3};
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- () -> Geometry.fromWkb(wkb)
- );
- assertEquals("Geometry WKB parsing is not yet supported.",
exception.getMessage());
+ // Note: This is a rudimentary WKB handling test; actual WKB parsing is
not yet implemented.
+ // Once we implement the appropriate parsing logic, this test should be
updated accordingly.
+ Geometry geometry = Geometry.fromWkb(wkb);
+ assertNotNull(geometry);
+ assertArrayEquals(wkb, geometry.toWkb());
+ assertEquals(0, geometry.srid());
}
/** Tests for Geometry EWKB parsing. */
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
new file mode 100644
index 000000000000..425abac2efed
--- /dev/null
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util;
+
+import org.apache.spark.unsafe.types.GeographyVal;
+import org.apache.spark.unsafe.types.GeometryVal;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+
+/**
+ * Test suite for the ST expression utility class.
+ */
+class STUtilsSuite {
+
+ /** Common test data used across multiple tests below. */
+
+ private final byte[] testWkb = new byte[] {0x01, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, (byte)0xF0, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x40};
+
+ // A sample Geography byte array for testing purposes, representing a
POINT(1 2) with SRID 4326.
+ private final byte[] testGeographySrid = new byte[] {(byte)0xE6, 0x10, 0x00,
0x00};
+ private final byte[] testGeographyBytes;
+
+ // A sample Geometry byte array for testing purposes, representing a POINT(1
2) with SRID 0.
+ private final byte[] testGeometrySrid = new byte[] {0x00, 0x00, 0x00, 0x00};
+ private final byte[] testGeometryBytes;
+
+ {
+ int sridLen = testGeographySrid.length;
+ int wkbLen = testWkb.length;
+ // Initialize GEOGRAPHY.
+ testGeographyBytes = new byte[sridLen + wkbLen];
+ System.arraycopy(testGeographySrid, 0, testGeographyBytes, 0, sridLen);
+ System.arraycopy(testWkb, 0, testGeographyBytes, sridLen, wkbLen);
+ // Initialize GEOMETRY.
+ testGeometryBytes = new byte[sridLen + wkbLen];
+ System.arraycopy(testGeometrySrid, 0, testGeometryBytes, 0, sridLen);
+ System.arraycopy(testWkb, 0, testGeometryBytes, sridLen, wkbLen);
+ }
+
+ /** Tests for ST expression utility methods. */
+
+ // ST_AsBinary
+ @Test
+ void testStAsBinaryGeography() {
+ GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
+ byte[] geographyWkb = STUtils.stAsBinary(geographyVal);
+ assertNotNull(geographyWkb);
+ assertArrayEquals(testWkb, geographyWkb);
+ }
+
+ @Test
+ void testStAsBinaryGeometry() {
+ GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
+ byte[] geometryWkb = STUtils.stAsBinary(geometryVal);
+ assertNotNull(geometryWkb);
+ assertArrayEquals(testWkb, geometryWkb);
+ }
+
+ // ST_GeogFromWKB
+ @Test
+ void testStGeogFromWKB() {
+ GeographyVal geographyVal = STUtils.stGeogFromWKB(testWkb);
+ assertNotNull(geographyVal);
+ assertArrayEquals(testGeographyBytes, geographyVal.getBytes());
+ }
+
+ // ST_GeomFromWKB
+ @Test
+ void testStGeomFromWKB() {
+ GeometryVal geometryVal = STUtils.stGeomFromWKB(testWkb);
+ assertNotNull(geometryVal);
+ assertArrayEquals(testGeometryBytes, geometryVal.getBytes());
+ }
+
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index f192a020f576..56146237a98b 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -287,6 +287,9 @@
| org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456)
| struct<rint(12.3456):double> |
| org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5,
0) | struct<round(2.5, 0):decimal(2,0)> |
| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | SELECT a,
b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1',
1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,row_number() OVER
(PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND
CURRENT ROW):int> |
+| org.apache.spark.sql.catalyst.expressions.ST_AsBinary | st_asbinary | SELECT
hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')))
|
struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))):string>
|
+| org.apache.spark.sql.catalyst.expressions.ST_GeogFromWKB | st_geogfromwkb |
SELECT
hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')))
|
struct<hex(st_asbinary(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))):string>
|
+| org.apache.spark.sql.catalyst.expressions.ST_GeomFromWKB | st_geomfromwkb |
SELECT
hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040')))
|
struct<hex(st_asbinary(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040'))):string>
|
| org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv |
SELECT schema_of_csv('1,abc') | struct<schema_of_csv(1,abc):string> |
| org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json |
SELECT schema_of_json('[{"col":0}]') |
struct<schema_of_json([{"col":0}]):string> |
| org.apache.spark.sql.catalyst.expressions.SchemaOfXml | schema_of_xml |
SELECT schema_of_xml('<p><a>1</a></p>') |
struct<schema_of_xml(<p><a>1</a></p>):string> |
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
new file mode 100644
index 000000000000..465f27b3c06f
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
@@ -0,0 +1,13 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query analysis
+Project
[hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040)))
AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query analysis
+Project
[hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040)))
AS result#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
new file mode 100644
index 000000000000..465f27b3c06f
--- /dev/null
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -0,0 +1,13 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query analysis
+Project
[hex(st_asbinary(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040)))
AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query analysis
+Project
[hex(st_asbinary(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040)))
AS result#x]
++- OneRowRelation
diff --git
a/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql
new file mode 100644
index 000000000000..720ce3767dc4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/st-functions.sql
@@ -0,0 +1 @@
+--IMPORT st-functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
new file mode 100644
index 000000000000..02bb526bee25
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -0,0 +1,5 @@
+---- ST reader/writer expressions
+
+-- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types.
+SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result;
+SELECT
hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result;
diff --git
a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
new file mode 100644
index 000000000000..b77c11f0dd1d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
@@ -0,0 +1,15 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
new file mode 100644
index 000000000000..b77c11f0dd1d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -0,0 +1,15 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT
hex(ST_AsBinary(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
new file mode 100644
index 000000000000..46c59bbd6d0c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class STExpressionsSuite
+ extends QueryTest
+ with SharedSparkSession
+ with ExpressionEvalHelper {
+
+ // Private common constants used across several tests.
+ private val defaultGeographyType: DataType =
+ GeographyType(ExpressionDefaults.DEFAULT_GEOGRAPHY_SRID)
+ private val defaultGeometryType: DataType =
+ GeometryType(ExpressionDefaults.DEFAULT_GEOMETRY_SRID)
+
+ /** ST reader/writer expressions. */
+
+ test("ST_AsBinary") {
+ // Test data: WKB representation of POINT(1 2).
+ val wkb =
Hex.unhex("0101000000000000000000F03F0000000000000040".getBytes())
+ val wkbLiteral = Literal.create(wkb, BinaryType)
+ // ST_GeogFromWKB and ST_AsBinary.
+ val geographyExpression = ST_GeogFromWKB(wkbLiteral)
+ assert(geographyExpression.dataType.sameType(defaultGeographyType))
+ checkEvaluation(ST_AsBinary(geographyExpression), wkb)
+ // ST_GeomFromWKB and ST_AsBinary.
+ val geometryExpression = ST_GeomFromWKB(wkbLiteral)
+ assert(geometryExpression.dataType.sameType(defaultGeometryType))
+ checkEvaluation(ST_AsBinary(geometryExpression), wkb)
+ }
+
+}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index e90907b904bd..33f128409f7e 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -60,7 +60,7 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
"predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs",
"datetime_funcs",
"generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs",
"map_funcs", "math_funcs",
"misc_funcs", "string_funcs", "struct_funcs", "window_funcs",
"xml_funcs", "table_funcs",
- "url_funcs", "variant_funcs").sorted
+ "url_funcs", "variant_funcs", "st_funcs").sorted
val invalidGroupName = "invalid_group_funcs"
checkError(
exception = intercept[SparkIllegalArgumentException] {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]