This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 11419bb2cff7 [SPARK-55530][GEO][SQL] Support Geo result sets in Hive
and Thrift server
11419bb2cff7 is described below
commit 11419bb2cff7cd4beeecf1a3918bd4da2aaba507
Author: Uros Bojanic <[email protected]>
AuthorDate: Mon Feb 23 00:20:10 2026 +0800
[SPARK-55530][GEO][SQL] Support Geo result sets in Hive and Thrift server
### What changes were proposed in this pull request?
Implement geospatial data output display in Extended Well-Known Text (EWKT)
format and add the appropriate geospatial type mapping in Hive Thrift Server.
### Why are the changes needed?
Display Geometry and Geography values in a human-readable format, which
allows projecting geospatial data in Spark SQL and Thrift Server.
### Does this PR introduce _any_ user-facing change?
Yes, geospatial data is now displayed using EWKT.
### How was this patch tested?
- Unit tests for WKT/EWKT writing from geo catalyst classes and STUtils
class.
- HiveResult unit tests for standalone and nested geometry/geography
formatting.
- SparkExecuteStatementOperation unit tests for thrift server type mapping.
- RandomDataGenerator unit test for generating random geospatial data.
- End-to-end SQL golden file tests for geospatial result set display.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54325 from uros-db/geo-thrift-server.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/catalyst/util/Geography.java | 15 +-
.../apache/spark/sql/catalyst/util/Geometry.java | 18 ++-
.../apache/spark/sql/catalyst/util/STUtils.java | 10 ++
.../org/apache/spark/sql/RandomDataGenerator.scala | 45 ++++++
.../spark/sql/RandomDataGeneratorSuite.scala | 16 ++
.../sql/catalyst/util/GeographyExecutionSuite.java | 19 +--
.../sql/catalyst/util/GeometryExecutionSuite.java | 36 +++--
.../spark/sql/catalyst/util/StUtilsSuite.java | 13 ++
.../apache/spark/sql/execution/HiveResult.scala | 10 +-
.../analyzer-results/st-functions.sql.out | 155 ++++++++++++++++++
.../resources/sql-tests/inputs/st-functions.sql | 30 ++++
.../sql-tests/results/st-functions.sql.out | 176 +++++++++++++++++++++
.../spark/sql/execution/HiveResultSuite.scala | 45 ++++++
.../spark/sql/hive/thriftserver/RowSetUtils.scala | 13 +-
.../SparkExecuteStatementOperation.scala | 2 +
.../SparkExecuteStatementOperationSuite.scala | 14 +-
16 files changed, 581 insertions(+), 36 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
index 8831548bf1fc..445e30af15b8 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geography.java
@@ -23,6 +23,7 @@ import org.apache.spark.unsafe.types.GeographyVal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
// Catalyst-internal server-side execution wrapper for GEOGRAPHY.
@@ -154,14 +155,20 @@ public final class Geography implements Geo {
@Override
public byte[] toWkt() {
- // Once WKT conversion is implemented, it should support various
precisions.
- throw new UnsupportedOperationException("Geography WKT conversion is not
yet supported.");
+ return toWktInternal().getBytes(StandardCharsets.UTF_8);
}
@Override
public byte[] toEwkt() {
- // Once EWKT conversion is implemented, it should support various
precisions.
- throw new UnsupportedOperationException("Geography EWKT conversion is not
yet supported.");
+ String ewkt = "SRID=" + srid() + ";" + toWktInternal();
+ return ewkt.getBytes(StandardCharsets.UTF_8);
+ }
+
+ private String toWktInternal() {
+ WkbReader reader = new WkbReader(true);
+ GeometryModel model = reader.read(Arrays.copyOfRange(
+ getBytes(), WKB_OFFSET, getBytes().length));
+ return model.toString();
}
/** Other instance methods, inherited from the `Geo` interface. */
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
index 8fb206d072bd..757c63f421e7 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/Geometry.java
@@ -23,6 +23,7 @@ import org.apache.spark.unsafe.types.GeometryVal;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
// Catalyst-internal server-side execution wrapper for GEOMETRY.
@@ -154,14 +155,23 @@ public final class Geometry implements Geo {
@Override
public byte[] toWkt() {
- // Once WKT conversion is implemented, it should support various
precisions.
- throw new UnsupportedOperationException("Geometry WKT conversion is not
yet supported.");
+ return toWktInternal().getBytes(StandardCharsets.UTF_8);
}
@Override
public byte[] toEwkt() {
- // Once EWKT conversion is implemented, it should support various
precisions.
- throw new UnsupportedOperationException("Geometry EWKT conversion is not
yet supported.");
+ if (srid() == DEFAULT_SRID) {
+ return toWkt();
+ }
+ String ewkt = "SRID=" + srid() + ";" + toWktInternal();
+ return ewkt.getBytes(StandardCharsets.UTF_8);
+ }
+
+ private String toWktInternal() {
+ WkbReader reader = new WkbReader();
+ GeometryModel model = reader.read(Arrays.copyOfRange(
+ getBytes(), WKB_OFFSET, getBytes().length));
+ return model.toString();
}
/** Other instance methods, inherited from the `Geo` interface. */
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
index 78a54846b25d..c02192965fd7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -21,6 +21,7 @@ import org.apache.spark.sql.types.GeographyType;
import org.apache.spark.sql.types.GeometryType;
import org.apache.spark.unsafe.types.GeographyVal;
import org.apache.spark.unsafe.types.GeometryVal;
+import org.apache.spark.unsafe.types.UTF8String;
// This class defines static methods that used to implement ST expressions
using `StaticInvoke`.
public final class STUtils {
@@ -101,6 +102,15 @@ public final class STUtils {
return fromPhysVal(geo).toWkb();
}
+ // ST_AsEWKT
+ public static UTF8String stAsEwkt(GeographyVal geo) {
+ return UTF8String.fromBytes(fromPhysVal(geo).toEwkt());
+ }
+
+ public static UTF8String stAsEwkt(GeometryVal geo) {
+ return UTF8String.fromBytes(fromPhysVal(geo).toEwkt());
+ }
+
// ST_GeogFromWKB
public static GeographyVal stGeogFromWKB(byte[] wkb) {
return toPhysVal(Geography.fromWkb(wkb));
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 655ff1e728dc..ad4b701c19cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -386,6 +386,51 @@ object RandomDataGenerator {
}
}
}
+ case gt: GeometryType =>
+ val possibleGeometriesWKB = Seq(
+ "010100000000000000000031400000000000001c40", // POINT (17 7)
+ "010100000000000000000014400000000000001440", // POINT (5 5)
+ "010100000000000000008057400000000000003340" // POINT (93.5 19)
+ )
+
+ val possibleSrids = Seq(4326, 3857, 0)
+
+ Some(() => {
+ val wkbIdx = rand.nextInt(possibleGeometriesWKB.length)
+ val sridIdx = rand.nextInt(possibleSrids.length)
+ val wkb = possibleGeometriesWKB(wkbIdx).grouped(2)
+ .map(Integer.parseInt(_, 16).toByte).toArray
+ val srid = if (gt.srid == GeometryType.MIXED_SRID) {
+ possibleSrids(sridIdx)
+ } else {
+ gt.srid
+ }
+
+ Geometry.fromWKB(wkb, srid)
+ })
+
+ case gt: GeographyType =>
+ val possibleGeometriesWKB = Seq(
+ "010100000000000000000031400000000000001c40", // POINT (17 7)
+ "010100000000000000000014400000000000001440", // POINT (5 5)
+ "010100000000000000008057400000000000003340" // POINT (93.5 19)
+ )
+
+ val possibleSrids = Seq(4326)
+
+ Some(() => {
+ val wkbIdx = rand.nextInt(possibleGeometriesWKB.length)
+ val sridIdx = rand.nextInt(possibleSrids.length)
+ val wkb = possibleGeometriesWKB(wkbIdx).grouped(2)
+ .map(Integer.parseInt(_, 16).toByte).toArray
+ val srid = if (gt.srid == GeographyType.MIXED_SRID) {
+ possibleSrids(sridIdx)
+ } else {
+ gt.srid
+ }
+
+ Geography.fromWKB(wkb, srid)
+ })
case unsupportedType => None
}
// Handle nullability by wrapping the non-null value generator:
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
index e2177307824b..37e60f850906 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala
@@ -145,6 +145,22 @@ class RandomDataGeneratorSuite extends SparkFunSuite with
SQLHelper {
assert(Arrays.equals(array2, arrayExpected))
}
+ // Geospatial types:
+ for (nullable <- Seq(true, false)) {
+ test(s"GeometryType with fixed SRID (nullable=$nullable)") {
+ testRandomDataGeneration(GeometryType(4326), nullable)
+ }
+ test(s"GeometryType with mixed SRID (nullable=$nullable)") {
+ testRandomDataGeneration(GeometryType("ANY"), nullable)
+ }
+ test(s"GeographyType with fixed SRID (nullable=$nullable)") {
+ testRandomDataGeneration(GeographyType(4326), nullable)
+ }
+ test(s"GeographyType with mixed SRID (nullable=$nullable)") {
+ testRandomDataGeneration(GeographyType("ANY"), nullable)
+ }
+ }
+
test("SPARK-35116: The generated data fits the precision of
DayTimeIntervalType in spark") {
(dayTimeIntervalTypes ++ yearMonthIntervalTypes).foreach { dt =>
for (seed <- 1 to 1000) {
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
index bd1ecf7e8c10..d7c40047977a 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeographyExecutionSuite.java
@@ -22,6 +22,7 @@ import org.junit.jupiter.api.Test;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
import java.util.HexFormat;
import static org.junit.jupiter.api.Assertions.*;
@@ -217,23 +218,17 @@ class GeographyExecutionSuite {
/** Tests for Geography WKT and EWKT converters. */
@Test
- void testToWktUnsupported() {
+ void testToWkt() {
+ // The test geography is POINT(1 2) with SRID 4326.
Geography geography = Geography.fromBytes(testGeographyVal);
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- geography::toWkt
- );
- assertEquals("Geography WKT conversion is not yet supported.",
exception.getMessage());
+ assertEquals("POINT(1 2)", new String(geography.toWkt(),
StandardCharsets.UTF_8));
}
@Test
- void testToEwktUnsupported() {
+ void testToEwkt() {
+ // The test geography is POINT(1 2) with SRID 4326.
Geography geography = Geography.fromBytes(testGeographyVal);
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- geography::toEwkt
- );
- assertEquals("Geography EWKT conversion is not yet supported.",
exception.getMessage());
+ assertEquals("SRID=4326;POINT(1 2)", new String(geography.toEwkt(),
StandardCharsets.UTF_8));
}
/** Tests for other Geography methods. */
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
index d28ba719f748..617ef7752a05 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GeometryExecutionSuite.java
@@ -22,6 +22,7 @@ import org.junit.jupiter.api.Test;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
import java.util.HexFormat;
import static org.junit.jupiter.api.Assertions.*;
@@ -43,6 +44,17 @@ class GeometryExecutionSuite {
0x40
};
+ // The same Geometry as before, representing a POINT(1 2), but with SRID 0
(i.e. undefined SRID).
+ private final byte[] testGeometryValNoSrid = new byte[] {
+ 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, (byte)0xF0,
+ 0x3F, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x40
+ };
+
/** Tests for Geometry factory methods and getters. */
@Test
@@ -220,23 +232,23 @@ class GeometryExecutionSuite {
/** Tests for Geometry WKT and EWKT converters. */
@Test
- void testToWktUnsupported() {
+ void testToWkt() {
+ // The test geometry is POINT(1 2) with SRID 4326.
Geometry geometry = Geometry.fromBytes(testGeometryVal);
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- geometry::toWkt
- );
- assertEquals("Geometry WKT conversion is not yet supported.",
exception.getMessage());
+ assertEquals("POINT(1 2)", new String(geometry.toWkt(),
StandardCharsets.UTF_8));
+ // WKT output should be the same regardless of whether the Geometry SRID
value.
+ Geometry geometryNoSrid = Geometry.fromBytes(testGeometryValNoSrid);
+ assertEquals("POINT(1 2)", new String(geometryNoSrid.toWkt(),
StandardCharsets.UTF_8));
}
@Test
- void testToEwktUnsupported() {
+ void testToEwkt() {
+ // The test geometry is POINT(1 2) with SRID 4326.
Geometry geometry = Geometry.fromBytes(testGeometryVal);
- UnsupportedOperationException exception = assertThrows(
- UnsupportedOperationException.class,
- geometry::toEwkt
- );
- assertEquals("Geometry EWKT conversion is not yet supported.",
exception.getMessage());
+ assertEquals("SRID=4326;POINT(1 2)", new String(geometry.toEwkt(),
StandardCharsets.UTF_8));
+ // If the Geometry has SRID 0 (undefined SRID), then the EWKT output does
not include it.
+ Geometry geometryNoSrid = Geometry.fromBytes(testGeometryValNoSrid);
+ assertEquals("POINT(1 2)", new String(geometryNoSrid.toEwkt(),
StandardCharsets.UTF_8));
}
/** Tests for other Geometry methods. */
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
index 4b154f874974..e5e6d32ab99f 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -94,6 +94,19 @@ class STUtilsSuite {
assertArrayEquals(testWkb, geometryWkb);
}
+ // ST_AsEWKT
+ @Test
+ void testStAsEwktGeography() {
+ GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
+ assertEquals("SRID=4326;POINT(1 2)",
STUtils.stAsEwkt(geographyVal).toString());
+ }
+
+ @Test
+ void testStAsEwktGeometry() {
+ GeometryVal geometryVal = GeometryVal.fromBytes(testGeometryBytes);
+ assertEquals("POINT(1 2)", STUtils.stAsEwkt(geometryVal).toString());
+ }
+
// ST_GeogFromWKB
@Test
void testStGeogFromWKB() {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 3a7b75a555af..8a666bbb9dad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -23,7 +23,7 @@ import java.time._
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.SQLConfHelper
import org.apache.spark.sql.catalyst.expressions.ToStringBase
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils,
FractionTimeFormatter, TimeFormatter, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils,
FractionTimeFormatter, STUtils, TimeFormatter, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.HIVE_STYLE
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros,
periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
import org.apache.spark.sql.execution.command.{DescribeCommandBase,
ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
@@ -149,6 +149,14 @@ object HiveResult extends SQLConfHelper {
startField,
endField)
case (v: VariantVal, VariantType) => v.toString
+ case (g: Geometry, dt: GeometryType) =>
+ val internalGeom = STUtils.serializeGeomFromWKB(g, dt)
+ val s = STUtils.stAsEwkt(internalGeom).toString
+ if (nested) "\"" + s + "\"" else s
+ case (g: Geography, dt: GeographyType) =>
+ val internalGeog = STUtils.serializeGeogFromWKB(g, dt)
+ val s = STUtils.stAsEwkt(internalGeog).toString
+ if (nested) "\"" + s + "\"" else s
case (other, u: UserDefinedType[_]) => u.stringifyValue(other)
}
}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index 769c59c5d239..8391c4800d8b 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -22,6 +22,161 @@ InsertIntoHadoopFsRelationCommand file:[not included in
comparison]/{warehouse_d
+- LocalRelation [col1#x]
+-- !query
+SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')
+-- !query analysis
+Project [st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0) AS
st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040', 4326)
+-- !query analysis
+Project [st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 4326) AS
st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 4326)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT
ST_SetSrid(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'), 3857)
+-- !query analysis
+Project
[st_setsrid(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040, 0),
3857) AS
st_setsrid(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040', 0),
3857)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
+-- !query analysis
+Project [st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) AS
st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOMETRY(4326))
+-- !query analysis
+Project [cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040) as
geometry(4326)) AS
CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS
GEOMETRY(4326))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_GeomFromWKB(NULL)
+-- !query analysis
+Project [st_geomfromwkb(cast(null as binary), 0) AS st_geomfromwkb(NULL, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_GeomFromWKB(NULL, 4326)
+-- !query analysis
+Project [st_geomfromwkb(cast(null as binary), 4326) AS st_geomfromwkb(NULL,
4326)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_SetSrid(ST_GeomFromWKB(NULL), 3857)
+-- !query analysis
+Project [st_setsrid(st_geomfromwkb(cast(null as binary), 0), 3857) AS
st_setsrid(st_geomfromwkb(NULL, 0), 3857)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_GeogFromWKB(NULL)
+-- !query analysis
+Project [st_geogfromwkb(cast(null as binary)) AS st_geogfromwkb(NULL)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(NULL) AS GEOMETRY(4326))
+-- !query analysis
+Project [cast(st_geogfromwkb(cast(null as binary)) as geometry(4326)) AS
CAST(st_geogfromwkb(NULL) AS GEOMETRY(4326))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))
+-- !query analysis
+Project [array(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040,
0)) AS array(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
0))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326))
+-- !query analysis
+Project [array(st_geomfromwkb(0x0101000000000000000000F03F0000000000000040,
4326)) AS array(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
4326))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))
+-- !query analysis
+Project [array(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040))
AS array(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT ST_GeomFromWKB(wkb) FROM geodata
+-- !query analysis
+Project [st_geomfromwkb(wkb#x, 0) AS st_geomfromwkb(wkb, 0)#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT ST_GeomFromWKB(wkb, 4326) FROM geodata
+-- !query analysis
+Project [st_geomfromwkb(wkb#x, 4326) AS st_geomfromwkb(wkb, 4326)#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT ST_SetSrid(ST_GeomFromWKB(wkb), 3857) FROM geodata
+-- !query analysis
+Project [st_setsrid(st_geomfromwkb(wkb#x, 0), 3857) AS
st_setsrid(st_geomfromwkb(wkb, 0), 3857)#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT ST_GeogFromWKB(wkb) FROM geodata
+-- !query analysis
+Project [st_geogfromwkb(wkb#x) AS st_geogfromwkb(wkb)#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(wkb) AS GEOMETRY(4326)) FROM geodata
+-- !query analysis
+Project [cast(st_geogfromwkb(wkb#x) as geometry(4326)) AS
CAST(st_geogfromwkb(wkb) AS GEOMETRY(4326))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(wkb)) FROM geodata
+-- !query analysis
+Project [array(st_geomfromwkb(wkb#x, 0)) AS array(st_geomfromwkb(wkb, 0))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(wkb, 4326)) FROM geodata
+-- !query analysis
+Project [array(st_geomfromwkb(wkb#x, 4326)) AS array(st_geomfromwkb(wkb,
4326))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT array(ST_GeogFromWKB(wkb)) FROM geodata
+-- !query analysis
+Project [array(st_geogfromwkb(wkb#x)) AS array(st_geogfromwkb(wkb))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
-- !query
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS
STRING) AS result
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index e379bd797a74..c12b79fb2869 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -7,6 +7,36 @@ INSERT INTO geodata VALUES
(NULL),
(X'0101000000000000000000F03F0000000000000040');
+-- Cast geography to geometry, then display.
+
+-- 1. Driver-level queries.
+SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040');
+SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040', 4326);
+SELECT
ST_SetSrid(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'), 3857);
+SELECT ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040');
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOMETRY(4326));
+-- NULL handling.
+SELECT ST_GeomFromWKB(NULL);
+SELECT ST_GeomFromWKB(NULL, 4326);
+SELECT ST_SetSrid(ST_GeomFromWKB(NULL), 3857);
+SELECT ST_GeogFromWKB(NULL);
+SELECT CAST(ST_GeogFromWKB(NULL) AS GEOMETRY(4326));
+-- Nested in array.
+SELECT array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'));
+SELECT array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326));
+SELECT array(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'));
+
+-- 2. Table-level queries.
+SELECT ST_GeomFromWKB(wkb) FROM geodata;
+SELECT ST_GeomFromWKB(wkb, 4326) FROM geodata;
+SELECT ST_SetSrid(ST_GeomFromWKB(wkb), 3857) FROM geodata;
+SELECT ST_GeogFromWKB(wkb) FROM geodata;
+SELECT CAST(ST_GeogFromWKB(wkb) AS GEOMETRY(4326)) FROM geodata;
+-- Nested in array.
+SELECT array(ST_GeomFromWKB(wkb)) FROM geodata;
+SELECT array(ST_GeomFromWKB(wkb, 4326)) FROM geodata;
+SELECT array(ST_GeogFromWKB(wkb)) FROM geodata;
+
--- Casting geospatial data types
-- GEOGRAPHY and GEOMETRY data types cannot be cast to/from other data types.
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index dec83cf4c460..c4b0fbeec5be 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -25,6 +25,182 @@ struct<>
+-- !query
+SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')
+-- !query schema
+struct<st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
0):geometry(0)>
+-- !query output
+POINT(1 2)
+
+
+-- !query
+SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040', 4326)
+-- !query schema
+struct<st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
4326):geometry(4326)>
+-- !query output
+SRID=4326;POINT(1 2)
+
+
+-- !query
+SELECT
ST_SetSrid(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'), 3857)
+-- !query schema
+struct<st_setsrid(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
0), 3857):geometry(3857)>
+-- !query output
+SRID=3857;POINT(1 2)
+
+
+-- !query
+SELECT ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
+-- !query schema
+struct<st_geogfromwkb(X'0101000000000000000000F03F0000000000000040'):geography(4326)>
+-- !query output
+SRID=4326;POINT(1 2)
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS
GEOMETRY(4326))
+-- !query schema
+struct<CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS
GEOMETRY(4326)):geometry(4326)>
+-- !query output
+SRID=4326;POINT(1 2)
+
+
+-- !query
+SELECT ST_GeomFromWKB(NULL)
+-- !query schema
+struct<st_geomfromwkb(NULL, 0):geometry(0)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT ST_GeomFromWKB(NULL, 4326)
+-- !query schema
+struct<st_geomfromwkb(NULL, 4326):geometry(4326)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT ST_SetSrid(ST_GeomFromWKB(NULL), 3857)
+-- !query schema
+struct<st_setsrid(st_geomfromwkb(NULL, 0), 3857):geometry(3857)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT ST_GeogFromWKB(NULL)
+-- !query schema
+struct<st_geogfromwkb(NULL):geography(4326)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(NULL) AS GEOMETRY(4326))
+-- !query schema
+struct<CAST(st_geogfromwkb(NULL) AS GEOMETRY(4326)):geometry(4326)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))
+-- !query schema
+struct<array(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
0)):array<geometry(0)>>
+-- !query output
+["POINT(1 2)"]
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326))
+-- !query schema
+struct<array(st_geomfromwkb(X'0101000000000000000000F03F0000000000000040',
4326)):array<geometry(4326)>>
+-- !query output
+["SRID=4326;POINT(1 2)"]
+
+
+-- !query
+SELECT array(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))
+-- !query schema
+struct<array(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040')):array<geography(4326)>>
+-- !query output
+["SRID=4326;POINT(1 2)"]
+
+
+-- !query
+SELECT ST_GeomFromWKB(wkb) FROM geodata
+-- !query schema
+struct<st_geomfromwkb(wkb, 0):geometry(0)>
+-- !query output
+NULL
+POINT(1 2)
+
+
+-- !query
+SELECT ST_GeomFromWKB(wkb, 4326) FROM geodata
+-- !query schema
+struct<st_geomfromwkb(wkb, 4326):geometry(4326)>
+-- !query output
+NULL
+SRID=4326;POINT(1 2)
+
+
+-- !query
+SELECT ST_SetSrid(ST_GeomFromWKB(wkb), 3857) FROM geodata
+-- !query schema
+struct<st_setsrid(st_geomfromwkb(wkb, 0), 3857):geometry(3857)>
+-- !query output
+NULL
+SRID=3857;POINT(1 2)
+
+
+-- !query
+SELECT ST_GeogFromWKB(wkb) FROM geodata
+-- !query schema
+struct<st_geogfromwkb(wkb):geography(4326)>
+-- !query output
+NULL
+SRID=4326;POINT(1 2)
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(wkb) AS GEOMETRY(4326)) FROM geodata
+-- !query schema
+struct<CAST(st_geogfromwkb(wkb) AS GEOMETRY(4326)):geometry(4326)>
+-- !query output
+NULL
+SRID=4326;POINT(1 2)
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(wkb)) FROM geodata
+-- !query schema
+struct<array(st_geomfromwkb(wkb, 0)):array<geometry(0)>>
+-- !query output
+["POINT(1 2)"]
+[null]
+
+
+-- !query
+SELECT array(ST_GeomFromWKB(wkb, 4326)) FROM geodata
+-- !query schema
+struct<array(st_geomfromwkb(wkb, 4326)):array<geometry(4326)>>
+-- !query output
+["SRID=4326;POINT(1 2)"]
+[null]
+
+
+-- !query
+SELECT array(ST_GeogFromWKB(wkb)) FROM geodata
+-- !query schema
+struct<array(st_geogfromwkb(wkb)):array<geography(4326)>>
+-- !query output
+["SRID=4326;POINT(1 2)"]
+[null]
+
+
-- !query
SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS
STRING) AS result
-- !query schema
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
index ae5b303116de..251ed064af91 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -173,6 +173,51 @@ class HiveResultSuite extends SharedSparkSession {
assert(hiveResultString(plan2) === Seq("[5 00:00:00.010000000]"))
}
+ test("geometry formatting in hive result") {
+ // Geometry with no SRID.
+ val df = spark.sql(
+ "SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')")
+ val result = hiveResultString(df.queryExecution.executedPlan)
+ assert(result === Seq("POINT(1 2)"))
+ // Geometry with SRID 0.
+ val dfSrid0 = spark.sql(
+ "SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
0)")
+ val resultSrid0 = hiveResultString(dfSrid0.queryExecution.executedPlan)
+ assert(resultSrid0 === Seq("POINT(1 2)"))
+ // Geometry with SRID 4326.
+ val dfSrid4326 = spark.sql(
+ "SELECT ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040',
4326)")
+ val resultSrid4326 =
hiveResultString(dfSrid4326.queryExecution.executedPlan)
+ assert(resultSrid4326 === Seq("SRID=4326;POINT(1 2)"))
+ }
+
+ test("nested geometry formatting in hive result") {
+ // Geometry with no SRID.
+ val df = spark.sql(
+ "SELECT
array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040'))")
+ val result = hiveResultString(df.queryExecution.executedPlan)
+ assert(result === Seq("[\"POINT(1 2)\"]"))
+ // Geometry with SRID 4326.
+ val dfSrid4326 = spark.sql(
+ "SELECT
array(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040', 4326))")
+ val resultSrid4326 =
hiveResultString(dfSrid4326.queryExecution.executedPlan)
+ assert(resultSrid4326 === Seq("[\"SRID=4326;POINT(1 2)\"]"))
+ }
+
+ test("geography formatting in hive result") {
+ val df = spark.sql(
+ "SELECT ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')")
+ val result = hiveResultString(df.queryExecution.executedPlan)
+ assert(result === Seq("SRID=4326;POINT(1 2)"))
+ }
+
+ test("nested geography formatting in hive result") {
+ val df = spark.sql(
+ "SELECT
array(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))")
+ val result = hiveResultString(df.queryExecution.executedPlan)
+ assert(result === Seq("[\"SRID=4326;POINT(1 2)\"]"))
+ }
+
test("SPARK-52650: Use stringifyValue to get UDT string representation") {
val year = Year.of(18)
val tpe = new YearUDT()
diff --git
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
index 091b4a32b668..df31ca311d46 100644
---
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
+++
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -26,7 +26,7 @@ import org.apache.hive.service.rpc.thrift._
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.HiveResult._
-import org.apache.spark.sql.types.{BinaryType, BooleanType, ByteType,
DataType, DoubleType, FloatType, IntegerType, LongType, ShortType, StringType}
+import org.apache.spark.sql.types.{BinaryType, BooleanType, ByteType,
DataType, DoubleType, FloatType, GeographyType, GeometryType, IntegerType,
LongType, ShortType, StringType}
object RowSetUtils {
@@ -142,7 +142,16 @@ object RowSetUtils {
val value = if (row.isNullAt(ordinal)) {
""
} else {
- toHiveString((row.get(ordinal), typ), nested = true,
timeFormatters, binaryFormatter)
+ // In this code path, `nested` was historically set to true for
all types by default,
+ // but ideally it should be false in general. This was never a
problem because other
+ // types that reach this branch do not use the `nested` flag in
`toHiveString`. Now,
+ // Geospatial types use it for wrapping EWKT in quotes when nested
= true, so we need
+ // to set `nested` here to false to avoid spurious quotes for
standalone geo values.
+ val nested = typ match {
+ case _: GeometryType | _: GeographyType => false
+ case _ => true
+ }
+ toHiveString((row.get(ordinal), typ), nested, timeFormatters,
binaryFormatter)
}
values.add(value)
i += 1
diff --git
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 350aba1ab175..cd2bdefcc306 100644
---
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -335,6 +335,8 @@ object SparkExecuteStatementOperation {
case LongType => TTypeId.BIGINT_TYPE
case FloatType => TTypeId.FLOAT_TYPE
case DoubleType => TTypeId.DOUBLE_TYPE
+ case _: GeometryType => TTypeId.STRING_TYPE
+ case _: GeographyType => TTypeId.STRING_TYPE
case _: CharType => TTypeId.CHAR_TYPE
case _: VarcharType => TTypeId.VARCHAR_TYPE
case _: StringType => TTypeId.STRING_TYPE
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
index 5abf034c1dea..ce119cc38184 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.classic.{DataFrame, SparkSession}
import org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2EventManager
import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, NullType, StringType,
StructField, StructType}
+import org.apache.spark.sql.types.{GeographyType, GeometryType, IntegerType,
NullType, StringType, StructField, StructType}
class SparkExecuteStatementOperationSuite extends SparkFunSuite with
SharedSparkSession {
@@ -65,6 +65,18 @@ class SparkExecuteStatementOperationSuite extends
SparkFunSuite with SharedSpark
assert(columns.getColumns.get(1).getComment == "")
}
+ test("GeometryType and GeographyType are mapped to STRING_TYPE in
ThriftServer") {
+ val field1 = StructField("geom", GeometryType(4326))
+ val field2 = StructField("geog", GeographyType(4326))
+ val tableSchema = StructType(Seq(field1, field2))
+ val columns = SparkExecuteStatementOperation.toTTableSchema(tableSchema)
+ assert(columns.getColumnsSize == 2)
+ val geomType =
columns.getColumns.get(0).getTypeDesc.getTypes.get(0).getPrimitiveEntry.getType
+ assert(geomType === TTypeId.STRING_TYPE)
+ val geogType =
columns.getColumns.get(1).getTypeDesc.getTypes.get(0).getPrimitiveEntry.getType
+ assert(geogType === TTypeId.STRING_TYPE)
+ }
+
Seq(
(OperationState.CANCELED, (_: SparkExecuteStatementOperation).cancel()),
(OperationState.TIMEDOUT, (_:
SparkExecuteStatementOperation).timeoutCancel()),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]