Copilot commented on code in PR #2862: URL: https://github.com/apache/sedona/pull/2862#discussion_r3144264691
########## spark/common/src/test/scala/org/apache/sedona/sql/UDF/CatalogCategorizationTest.scala: ########## @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql.UDF + +import org.scalatest.funspec.AnyFunSpec + +/** + * Asserts the categorization invariant for `Catalog.expressions`: every registered function + * appears in exactly one of the named category sequences in `Catalog.categorizedSequences`. + * + * If you add a function to `Catalog.expressions` (typically by adding it to one of the named + * sequences) and this test fails, it means a function ended up in zero or two+ sequences. Find + * the missing/duplicate function and place it in the right docs category — the names of the + * sequences mirror the categories at + * https://sedona.apache.org/latest/api/sql/Geometry-Functions/. + */ +class CatalogCategorizationTest extends AnyFunSpec { + + describe("Catalog categorization invariant") { + + it("every registered expression appears in exactly one named sequence") { + val flattened = Catalog.categorizedSequences.flatten.map(_._1.funcName) + val registered = Catalog.expressions.map(_._1.funcName) + + val flattenedSet = flattened.toSet + val registeredSet = registered.toSet + + val missing = registeredSet -- flattenedSet + val extra = flattenedSet -- registeredSet + val duplicates = flattened.diff(flattened.distinct).distinct + + assert( + missing.isEmpty, + s"Functions registered in Catalog.expressions but missing from any named sequence: " + + missing.toSeq.sorted.mkString(", ")) + assert( + extra.isEmpty, + s"Functions in named sequences but not registered in Catalog.expressions: " + + extra.toSeq.sorted.mkString(", ")) + assert( + duplicates.isEmpty, + s"Functions appearing in more than one named sequence: " + + duplicates.sorted.mkString(", ")) + } + + it("Catalog.expressions order matches the flattened sequence order") { + // Registration order is part of the public contract (used by registerAll). Refactoring + // categorization should preserve it. + val flattened = Catalog.categorizedSequences.flatten.map(_._1.funcName) + val registered = Catalog.expressions.map(_._1.funcName) + assert(registered == flattened, "registration order drifted from categorized order") + } Review Comment: This test currently compares `Catalog.categorizedSequences.flatten` against `Catalog.expressions`, but `Catalog.expressions` is defined as `categorizedSequences.flatten` in the production code. That makes the missing/extra checks and the order check effectively tautological and unable to catch functions accidentally dropped from the registry during refactors (it only meaningfully checks for duplicate function names). To make this test useful, compare against an independent source of truth (e.g., a checked-in snapshot/expected list or expected count, or keep `expressions` as the canonical list and assert it equals the flattened categorized sequences). ```suggestion * This test intentionally compares both production registries against a checked-in snapshot * rather than comparing them to each other, because `Catalog.expressions` is derived from * `Catalog.categorizedSequences.flatten` in production code. * * If you add, remove, or rename a function, update the snapshot below to match the intended * public registry order. The names of the sequences mirror the categories at * https://sedona.apache.org/latest/api/sql/Geometry-Functions/. */ class CatalogCategorizationTest extends AnyFunSpec { private val expectedFunctionNamesInRegistrationOrder: Seq[String] = Seq( // Keep this snapshot in the same order as Catalog.registerAll registers functions. // Update this list intentionally when the public function registry changes. ) private def duplicateNames(names: Seq[String]): Seq[String] = names.diff(names.distinct).distinct.sorted describe("Catalog categorization invariant") { it("Catalog.expressions matches the checked-in function registry snapshot") { val registered = Catalog.expressions.map(_._1.funcName) assert( registered == expectedFunctionNamesInRegistrationOrder, "Catalog.expressions drifted from the checked-in function registry snapshot") } it("named category sequences flatten to the checked-in function registry snapshot") { val flattened = Catalog.categorizedSequences.flatten.map(_._1.funcName) val duplicates = duplicateNames(flattened) assert( duplicates.isEmpty, s"Functions appearing in more than one named sequence: ${duplicates.mkString(", ")}") assert( flattened == expectedFunctionNamesInRegistrationOrder, "Flattened categorized sequences drifted from the checked-in function registry snapshot") } ``` ########## spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala: ########## @@ -44,354 +53,478 @@ object Catalog extends AbstractCatalog with Logging { function[ST_GeomFromText](0), function[ST_GeometryFromText](0), function[ST_LineFromText](), - function[ST_GeogFromWKT](0), - function[ST_GeogFromText](0), - function[ST_GeogFromWKB](0), - function[ST_GeogFromEWKB](0), - function[ST_GeogFromEWKT](), function[ST_GeomFromWKT](0), function[ST_GeomFromEWKT](), function[ST_GeomFromWKB](), function[ST_GeomFromEWKB](), function[ST_GeomFromGeoJSON](), function[ST_GeomFromGML](), function[ST_GeomFromKML](), - function[ST_CoordDim](), - function[ST_Perimeter](), - function[ST_Perimeter2D](), function[ST_Point](), - function[ST_Points](), function[ST_MakeEnvelope](), function[ST_MakePoint](null, null), function[ST_MakePointM](), function[ST_PointZ](0), function[ST_PointM](0), function[ST_PointZM](0), function[ST_PolygonFromEnvelope](), - function[ST_Project](), - function[ST_Contains](), - function[ST_Intersects](), - function[ST_Within](), - function[ST_KNN](), - function[ST_Covers](), - function[ST_CoveredBy](), - function[ST_Dimension](), - function[ST_Disjoint](), - function[ST_Distance](), - function[ST_3DDistance](), - function[ST_ConcaveHull](false), - function[ST_ConvexHull](), - function[ST_NPoints](), - function[ST_NDims](), - function[ST_Buffer](), - function[ST_BestSRID](), - function[ST_ShiftLongitude](), - function[ST_Envelope](), - function[ST_Expand](), - function[ST_Length](), - function[ST_Length2D](), - function[ST_Area](), - function[ST_Centroid](), - function[ST_Transform](true), - function[ST_Intersection](), - function[ST_Difference](), - function[ST_SymDifference](), - function[ST_UnaryUnion](), - function[ST_Union](), - function[ST_IsValidDetail](), - function[ST_IsValidTrajectory](), - function[ST_IsValid](), - function[ST_IsEmpty](), - function[ST_ReducePrecision](), - function[ST_Equals](), - function[ST_Touches](), - function[ST_Relate](), - function[ST_RelateMatch](), - function[ST_Overlaps](), - function[ST_Crosses](), - function[ST_CrossesDateLine](), - function[ST_IsSimple](), - function[ST_MakeValid](false), - function[ST_SimplifyPreserveTopology](), - function[ST_AsText](), - function[ST_AsGeoJSON](), - function[ST_AsBinary](), - function[ST_AsEWKB](), - function[ST_AsHEXEWKB](), - function[ST_AsGML](), - function[ST_AsKML](), - function[ST_Simplify](), - function[ST_SimplifyVW](), - function[ST_SimplifyPolygonHull](), - function[ST_SRID](), - function[ST_SetSRID](), - function[ST_GeometryType](), - function[ST_NumGeometries](), - function[ST_LineMerge](), - function[ST_Azimuth](), - function[ST_X](), - function[ST_Y](), - function[ST_Z](), - function[ST_Zmflag](), - function[ST_StartPoint](), - function[ST_Snap](), - function[ST_ClosestPoint](), - function[ST_ShortestLine](), - function[ST_OffsetCurve](), + function[ST_GeomFromGeoHash](null), + function[ST_PointFromGeoHash](null), + function[ST_GeomFromMySQL](), + function[ST_MPointFromText](0), + function[ST_MPolyFromText](0), + function[ST_MLineFromText](0), + function[ST_GeomCollFromText](0)) + + // Geometry-Accessors + val geometryAccessorExprs: Seq[FunctionDescription] = Seq( + function[GeometryType](), function[ST_Boundary](), - function[ST_HasZ](), - function[ST_HasM](), - function[ST_M](), - function[ST_MMin](), - function[ST_MMax](), - function[ST_MinimumClearance](), - function[ST_MinimumClearanceLine](), - function[ST_MinimumBoundingRadius](), - function[ST_MinimumBoundingCircle](BufferParameters.DEFAULT_QUADRANT_SEGMENTS * 6), + function[ST_CoordDim](), + function[ST_CrossesDateLine](), + function[ST_Dimension](), + function[ST_Dump](), + function[ST_DumpPoints](), function[ST_EndPoint](), function[ST_ExteriorRing](), function[ST_GeometryN](), - function[ST_H3CellDistance](), - function[ST_H3CellIDs](), - function[ST_H3ToGeom](), - function[ST_H3KRing](), - function[ST_BingTile](), - function[ST_BingTileAt](), - function[ST_BingTilesAround](), - function[ST_BingTileZoomLevel](), - function[ST_BingTileX](), - function[ST_BingTileY](), - function[ST_BingTilePolygon](), - function[ST_BingTileCellIDs](), - function[ST_BingTileToGeom](), + function[ST_GeometryType](), + function[ST_HasM](), + function[ST_HasZ](), function[ST_InteriorRingN](), - function[ST_InterpolatePoint](), - function[ST_Dump](), - function[ST_DumpPoints](), function[ST_IsClosed](), function[ST_IsCollection](), - function[ST_NumInteriorRings](), - function[ST_NumInteriorRing](), - function[ST_AddMeasure](), - function[ST_AddPoint](-1), - function[ST_RemovePoint](-1), - function[ST_RemoveRepeatedPoints](), - function[ST_SetPoint](), + function[ST_IsEmpty](), + function[ST_IsPolygonCCW](), function[ST_IsPolygonCW](), function[ST_IsRing](), - function[ST_IsPolygonCCW](), - function[ST_ForcePolygonCCW](), + function[ST_IsSimple](), + function[ST_M](), + function[ST_NDims](), + function[ST_NPoints](), + function[ST_NRings](), + function[ST_NumGeometries](), + function[ST_NumInteriorRing](), + function[ST_NumInteriorRings](), + function[ST_NumPoints](), + function[ST_PointN](), + function[ST_Points](), + function[ST_StartPoint](), + function[ST_X](), + function[ST_Y](), + function[ST_Z](), + function[ST_Zmflag]()) + + // Geometry-Editors + val geometryEditorExprs: Seq[FunctionDescription] = Seq( + function[ST_AddPoint](-1), + function[ST_Collect](), + function[ST_CollectionExtract](defaultArgs = null), function[ST_FlipCoordinates](), + function[ST_Force2D](), + function[ST_Force3D](0.0), + function[ST_Force3DM](0.0), + function[ST_Force3DZ](0.0), + function[ST_Force4D](), + function[ST_Force_2D](), + function[ST_ForceCollection](), + function[ST_ForcePolygonCCW](), + function[ST_ForcePolygonCW](), + function[ST_ForceRHR](), + function[ST_LineFromMultiPoint](), + function[ST_LineMerge](), function[ST_LineSegments](), - function[ST_LineSubstring](), - function[ST_LineInterpolatePoint](), - function[ST_LineLocatePoint](), - function[ST_LocateAlong](), - function[ST_LongestLine](), - function[ST_SubDivideExplode](), - function[ST_SubDivide](), - function[ST_Segmentize](), function[ST_MakeLine](), - function[ST_Polygon](), - function[ST_Polygonize](), function[ST_MakePolygon](null), - function[ST_MaximumInscribedCircle](), - function[ST_MaxDistance](), - function[ST_GeoHash](), - function[ST_GeoHashNeighbors](), - function[ST_GeoHashNeighbor](), - function[ST_GeomFromGeoHash](null), - function[ST_PointFromGeoHash](null), - function[ST_GeogFromGeoHash](null), - function[ST_Collect](), function[ST_Multi](), - function[ST_PointOnSurface](), + function[ST_Normalize](), + function[ST_Polygon](), + function[ST_Project](), + function[ST_RemovePoint](-1), + function[ST_RemoveRepeatedPoints](), function[ST_Reverse](), - function[ST_PointN](), + function[ST_Segmentize](), + function[ST_SetPoint](), + function[ST_ShiftLongitude]()) + + // Geometry-Output + val geometryOutputExprs: Seq[FunctionDescription] = Seq( + function[ST_AsBinary](), + function[ST_AsEWKB](), function[ST_AsEWKT](), - function[ST_Force_2D](), - function[ST_Force2D](), - function[ST_ForcePolygonCW](), - function[ST_ForceRHR](), - function[ST_ZMax](), - function[ST_ZMin](), - function[ST_YMax](), - function[ST_YMin](), - function[ST_XMax](), - function[ST_XMin](), - function[ST_BuildArea](), + function[ST_AsGeoJSON](), + function[ST_AsGML](), + function[ST_AsHEXEWKB](), + function[ST_AsKML](), + function[ST_AsText](), + function[ST_GeoHash]()) + + // Predicates + val predicateExprs: Seq[FunctionDescription] = Seq( + function[ST_Contains](), + function[ST_CoveredBy](), + function[ST_Covers](), + function[ST_Crosses](), + function[ST_Disjoint](), + function[ST_DWithin](), + function[ST_Equals](), + function[ST_Intersects](), function[ST_OrderingEquals](), - function[ST_OrientedEnvelope](), - function[ST_CollectionExtract](defaultArgs = null), - function[ST_Normalize](), - function[ST_LineFromMultiPoint](), - function[ST_MPointFromText](0), - function[ST_MPolyFromText](0), - function[ST_MLineFromText](0), - function[ST_GeomCollFromText](0), - function[ST_GeogCollFromText](0), - function[ST_GeomFromMySQL](), - function[ST_Split](), - function[ST_S2CellIDs](), - function[ST_S2ToGeom](), - function[ST_GeometricMedian](1e-6, 1000, false), + function[ST_Overlaps](), + function[ST_Relate](), + function[ST_RelateMatch](), + function[ST_Touches](), + function[ST_Within]()) + + // Measurement-Functions + val measurementExprs: Seq[FunctionDescription] = Seq( + function[ST_3DDistance](), + function[ST_Angle](), + function[ST_Area](), + function[ST_AreaSpheroid](), + function[ST_Azimuth](), + function[ST_ClosestPoint](), + function[ST_Degrees](), + function[ST_Distance](), function[ST_DistanceSphere](), function[ST_DistanceSpheroid](), - function[ST_AreaSpheroid](), + function[ST_FrechetDistance](), + function[ST_HausdorffDistance](-1), + function[ST_Length](), + function[ST_Length2D](), function[ST_LengthSpheroid](), - function[ST_NumPoints](), - function[ST_Force3D](0.0), - function[ST_Force3DM](0.0), - function[ST_Force3DZ](0.0), - function[ST_Force4D](), - function[ST_ForceCollection](), + function[ST_LongestLine](), + function[ST_MaxDistance](), + function[ST_MinimumClearance](), + function[ST_MinimumClearanceLine](), + function[ST_Perimeter](), + function[ST_Perimeter2D](), + function[ST_ShortestLine]()) + + // Geometry-Processing + val geometryProcessingExprs: Seq[FunctionDescription] = Seq( + function[ST_ApproximateMedialAxis](), + function[ST_Buffer](), + function[ST_BuildArea](), + function[ST_Centroid](), + function[ST_ConcaveHull](false), + function[ST_ConvexHull](), + function[ST_DelaunayTriangles](), function[ST_GeneratePoints](), - function[ST_NRings](), - function[ST_Translate](0.0), + function[ST_GeometricMedian](1e-6, 1000, false), + function[ST_LabelPoint](), + function[ST_MaximumInscribedCircle](), + function[ST_MinimumBoundingCircle](BufferParameters.DEFAULT_QUADRANT_SEGMENTS * 6), + function[ST_MinimumBoundingRadius](), + function[ST_OffsetCurve](), + function[ST_OrientedEnvelope](), + function[ST_PointOnSurface](), + function[ST_Polygonize](), + function[ST_ReducePrecision](), + function[ST_Simplify](), + function[ST_SimplifyPolygonHull](), + function[ST_SimplifyPreserveTopology](), + function[ST_SimplifyVW](), + function[ST_Snap](), + function[ST_StraightSkeleton](), function[ST_TriangulatePolygon](), - function[ST_VoronoiPolygons](0.0, null), - function[ST_FrechetDistance](), + function[ST_VoronoiPolygons](0.0, null)) + + // Overlay-Functions + val overlayExprs: Seq[FunctionDescription] = Seq( + function[ST_Difference](), + function[ST_Intersection](), + function[ST_Split](), + function[ST_SubDivide](), + function[ST_SubDivideExplode](), + function[ST_SymDifference](), + function[ST_UnaryUnion](), + function[ST_Union]()) + + // Affine-Transformations + val affineTransformationExprs: Seq[FunctionDescription] = Seq( function[ST_Affine](), - function[ST_BoundingDiagonal](), - function[ST_Angle](), - function[ST_Degrees](), - function[ST_DelaunayTriangles](), - function[ST_HausdorffDistance](-1), - function[ST_DWithin](), - function[ST_IsValidReason](), - function[ST_Scale](), - function[ST_ScaleGeom](), function[ST_Rotate](), function[ST_RotateX](), function[ST_RotateY](), - function[ST_StraightSkeleton](), - function[ST_ApproximateMedialAxis](), - function[Barrier](), - // Expression for rasters - function[RS_NormalizedDifference](), - function[RS_Mean](), - function[RS_Mode](), - function[RS_FetchRegion](), - function[RS_GreaterThan](), - function[RS_GreaterThanEqual](), - function[RS_LessThan](), - function[RS_LessThanEqual](), - function[RS_Add](), - function[RS_Subtract](), - function[RS_Divide](), - function[RS_MultiplyFactor](), - function[RS_Multiply](), - function[RS_BitwiseAnd](), - function[RS_BitwiseOr](), - function[RS_CountValue](), - function[RS_Modulo](), - function[RS_SquareRoot](), - function[RS_LogicalDifference](), - function[RS_LogicalOver](), - function[RS_Array](), - function[RS_Normalize](), - function[RS_NormalizeAll](), - function[RS_AddBandFromArray](), - function[RS_BandAsArray](), - function[RS_MapAlgebra](null), + function[ST_Scale](), + function[ST_ScaleGeom](), + function[ST_Translate](0.0)) + + // Linear-Referencing + val linearReferencingExprs: Seq[FunctionDescription] = Seq( + function[ST_AddMeasure](), + function[ST_InterpolatePoint](), + function[ST_IsValidTrajectory](), + function[ST_LineInterpolatePoint](), + function[ST_LineLocatePoint](), + function[ST_LineSubstring](), + function[ST_LocateAlong]()) + + // Spatial-Reference-System + val spatialReferenceSystemExprs: Seq[FunctionDescription] = Seq( + function[ST_BestSRID](), + function[ST_SetSRID](), + function[ST_SRID](), + function[ST_Transform](true)) + + // Geometry-Validation + val geometryValidationExprs: Seq[FunctionDescription] = Seq( + function[ST_IsValid](), + function[ST_IsValidDetail](), + function[ST_IsValidReason](), + function[ST_MakeValid](false)) + + // Bounding-Box-Functions + val boundingBoxExprs: Seq[FunctionDescription] = Seq( + function[ST_BoundingDiagonal](), + function[ST_Envelope](), + function[ST_Expand](), + function[ST_MMax](), + function[ST_MMin](), + function[ST_XMax](), + function[ST_XMin](), + function[ST_YMax](), + function[ST_YMin](), + function[ST_ZMax](), + function[ST_ZMin]()) + + // Spatial-Indexing — also receives ST_KNN, which has its own NearestNeighbourSearching.md + // page but isn't listed under any of the 18 docs categories. + val spatialIndexingExprs: Seq[FunctionDescription] = Seq( + function[ST_BingTile](), + function[ST_BingTileAt](), + function[ST_BingTileCellIDs](), + function[ST_BingTilePolygon](), + function[ST_BingTilesAround](), + function[ST_BingTileToGeom](), + function[ST_BingTileX](), + function[ST_BingTileY](), + function[ST_BingTileZoomLevel](), + function[ST_GeoHashNeighbor](), + function[ST_GeoHashNeighbors](), + function[ST_H3CellDistance](), + function[ST_H3CellIDs](), + function[ST_H3KRing](), + function[ST_H3ToGeom](), + function[ST_S2CellIDs](), + function[ST_S2ToGeom](), + function[ST_KNN]()) + + // Address-Functions + val addressExprs: Seq[FunctionDescription] = + Seq(function[ExpandAddress](), function[ParseAddress]()) + + // Other / utility expressions not in any docs category + val otherExprs: Seq[FunctionDescription] = Seq(function[Barrier]()) + + // Geography (ST_Geog*) — see docs/api/sql/geography/Geography-Functions + val geographyExprs: Seq[FunctionDescription] = Seq( + function[ST_GeogFromWKT](0), + function[ST_GeogFromText](0), + function[ST_GeogFromWKB](0), + function[ST_GeogFromEWKB](0), + function[ST_GeogFromEWKT](), + function[ST_GeogCollFromText](0), + function[ST_GeogFromGeoHash](null), + function[ST_GeogToGeometry](), + function[ST_GeomToGeography]()) + + // =========================================================================== + // Raster (RS_) functions — categories from the raster docs pages + // =========================================================================== + + // Raster-Constructors + val rasterConstructorExprs: Seq[FunctionDescription] = Seq( function[RS_FromArcInfoAsciiGrid](), function[RS_FromGeoTiff](), + function[RS_FromNetCDF](), function[RS_MakeEmptyRaster](), function[RS_MakeRaster](), function[RS_MakeRasterForTesting](), - function[RS_Tile](), - function[RS_TileExplode](), - function[RS_Envelope](), - function[RS_NumBands](), + function[RS_NetCDFInfo]()) + + // Raster-Accessors (spatial properties: dimensions, scale, skew, world coords) + val rasterAccessorExprs: Seq[FunctionDescription] = Seq( + function[RS_GeoReference](), + function[RS_GeoTransform](), + function[RS_Height](), + function[RS_RasterToWorldCoord](), + function[RS_RasterToWorldCoordX](), + function[RS_RasterToWorldCoordY](), + function[RS_Rotation](), + function[RS_ScaleX](), + function[RS_ScaleY](), + function[RS_SkewX](), + function[RS_SkewY](), + function[RS_UpperLeftX](), + function[RS_UpperLeftY](), + function[RS_Width](), + function[RS_WorldToRasterCoord](), + function[RS_WorldToRasterCoordX](), + function[RS_WorldToRasterCoordY]()) + + // Raster-Band-Accessors (band-level properties and statistics) + val rasterBandAccessorExprs: Seq[FunctionDescription] = Seq( + function[RS_Band](), + function[RS_BandAsArray](), + function[RS_BandIsNoData](), + function[RS_BandNoDataValue](), + function[RS_BandPixelType](), + function[RS_Count](), + function[RS_SummaryStats](), + function[RS_SummaryStatsAll](), + function[RS_ZonalStats](), + function[RS_ZonalStatsAll]()) + + // Raster-Operators (configuration, transformation, manipulation) + val rasterOperatorExprs: Seq[FunctionDescription] = Seq( + function[RS_AddBand](), + function[RS_AddBandFromArray](), + function[RS_AsRaster](), + function[RS_Clip](), + function[RS_CRS](), + function[RS_Interpolate](), function[RS_Metadata](), - function[RS_SetSRID](), + function[RS_NormalizeAll](), + function[RS_NumBands](), + function[RS_ReprojectMatch]("nearestneighbor"), + function[RS_Resample](), + function[RS_SetBandNoDataValue](), function[RS_SetCRS](), function[RS_SetGeoReference](), - function[RS_SetBandNoDataValue](), function[RS_SetPixelType](), - function[RS_SetValues](), + function[RS_SetSRID](), function[RS_SetValue](), + function[RS_SetValues](), function[RS_SRID](), - function[RS_CRS](), + function[RS_Union](), function[RS_Value](1), - function[RS_Values](1), - function[RS_Intersects](), - function[RS_Interpolate](), - function[RS_AsGeoTiff](), - function[RS_AsCOG](), - function[RS_AsRaster](), + function[RS_Values](1)) + + // Raster-Output + val rasterOutputExprs: Seq[FunctionDescription] = Seq( function[RS_AsArcGrid](), function[RS_AsBase64](), - function[RS_AsPNG](), - function[RS_Width](), - function[RS_Height](), - function[RS_Union](), - function[RS_UpperLeftX](), - function[RS_UpperLeftY](), - function[RS_ScaleX](), - function[RS_ScaleY](), - function[RS_SkewX](), - function[RS_SkewY](), - function[RS_GeoReference](), - function[RS_Rotation](), - function[RS_GeoTransform](), + function[RS_AsCOG](), + function[RS_AsGeoTiff](), + function[RS_AsImage](), + function[RS_AsMatrix](), + function[RS_AsPNG]()) + + // Raster-Predicates + val rasterPredicateExprs: Seq[FunctionDescription] = + Seq(function[RS_Contains](), function[RS_Intersects](), function[RS_Within]()) + + // Raster-Geometry-Functions (raster → geometry derivations) + val rasterGeometryExprs: Seq[FunctionDescription] = + Seq(function[RS_ConvexHull](), function[RS_Envelope](), function[RS_MinConvexHull]()) + + // Pixel-Functions + val pixelExprs: Seq[FunctionDescription] = Seq( + function[RS_PixelAsCentroid](), + function[RS_PixelAsCentroids](), function[RS_PixelAsPoint](), function[RS_PixelAsPoints](), function[RS_PixelAsPolygon](), - function[RS_PixelAsPolygons](), - function[RS_PixelAsCentroid](), - function[RS_PixelAsCentroids](), - function[RS_Count](), - function[RS_Clip](), - function[RS_Band](), - function[RS_AddBand](), - function[RS_SummaryStatsAll](), - function[RS_SummaryStats](), - function[RS_BandIsNoData](), - function[RS_ConvexHull](), - function[RS_RasterToWorldCoordX](), - function[RS_RasterToWorldCoordY](), - function[RS_RasterToWorldCoord](), - function[RS_Within](), - function[RS_Contains](), - function[RS_WorldToRasterCoord](), - function[RS_WorldToRasterCoordX](), - function[RS_WorldToRasterCoordY](), - function[RS_BandNoDataValue](), - function[RS_BandPixelType](), - function[RS_MinConvexHull](), - function[RS_AsMatrix](), - function[RS_AsImage](), - function[RS_ZonalStats](), - function[RS_ZonalStatsAll](), - function[RS_Resample](), - function[RS_ReprojectMatch]("nearestneighbor"), - function[RS_FromNetCDF](), - function[RS_NetCDFInfo](), - // geom <-> geog conversion functions - function[ST_GeogToGeometry](), - function[ST_GeomToGeography]()) ++ geoStatsFunctions() + function[RS_PixelAsPolygons]()) - val aggregateExpressions: Seq[Aggregator[Geometry, _, _]] = - Seq(new ST_Envelope_Aggr, new ST_Intersection_Aggr, new ST_Union_Aggr(), new ST_Collect_Agg()) + // Map-Algebra-Operators + val mapAlgebraExprs: Seq[FunctionDescription] = Seq( + function[RS_Add](), + function[RS_Array](), + function[RS_BitwiseAnd](), + function[RS_BitwiseOr](), + function[RS_CountValue](), + function[RS_Divide](), + function[RS_FetchRegion](), + function[RS_GreaterThan](), + function[RS_GreaterThanEqual](), + function[RS_LessThan](), + function[RS_LessThanEqual](), + function[RS_LogicalDifference](), + function[RS_LogicalOver](), + function[RS_MapAlgebra](null), + function[RS_Mean](), + function[RS_Mode](), + function[RS_Modulo](), + function[RS_Multiply](), + function[RS_MultiplyFactor](), + function[RS_Normalize](), + function[RS_NormalizedDifference](), + function[RS_SquareRoot](), + function[RS_Subtract]()) - private def geoStatsFunctions(): Seq[FunctionDescription] = { - // Try loading geostats functions. Return a seq of geo-stats functions. If any error occurs, - // return an empty seq to skip registering these functions. - // This is for fixing a compatibility issue with DBR 17.3 LTS. See https://github.com/apache/sedona/issues/2472 + // Raster-Tiles + val rasterTileExprs: Seq[FunctionDescription] = + Seq(function[RS_Tile](), function[RS_TileExplode]()) + + // =========================================================================== + // dbx-incompatible functions, split by docs category. May fail to load on + // unsupported DBR versions — see https://github.com/apache/sedona/issues/2472. + // =========================================================================== + private val dbxIncompatibleGroups: (Seq[FunctionDescription], Seq[FunctionDescription]) = { try { - Seq( - function[ST_DBSCAN](), - function[ST_LocalOutlierFactor](), + // Clustering-Functions docs page + val clustering = Seq(function[ST_DBSCAN](), function[ST_LocalOutlierFactor]()) + // Spatial-Statistics docs page + val spatialStatistics = Seq( function[ST_GLocal](), function[ST_BinaryDistanceBandColumn](), function[ST_WeightedDistanceBandColumn]()) + (clustering, spatialStatistics) } catch { case e: Throwable => log.warn( - "GEO stats functions are not available due to Spark/DBR compatibility issues.", + "clustering and spatial-statistics functions are not available due to Spark/DBR compatibility issues.", e) - Seq.empty + (Seq.empty, Seq.empty) } } + val clusteringExprs: Seq[FunctionDescription] = dbxIncompatibleGroups._1 + val spatialStatisticsExprs: Seq[FunctionDescription] = dbxIncompatibleGroups._2 + + // =========================================================================== + // All named sequences in registration order. The categorization invariant + // (every entry in `expressions` lives in exactly one of these sequences) is + // checked by `CatalogCategorizationTest`. + // =========================================================================== + val categorizedSequences: Seq[Seq[FunctionDescription]] = Seq( + geometryConstructorExprs, Review Comment: The PR description and the comments here say registration order is preserved / categorizedSequences is in "registration order", but this refactor changes the ordering relative to the previous flat `expressions` list (e.g., the old list began with `ExpandAddress`/`ParseAddress`/`GeometryType`/`ST_LabelPoint`, while the new `categorizedSequences.flatten` begins with geometry constructors). If order is intended to be stable, reorder `categorizedSequences` to match the historical `expressions` order; otherwise, please update the PR description and remove/adjust the order-as-contract assertions/comments. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
