Copilot commented on code in PR #848:
URL: https://github.com/apache/sedona-db/pull/848#discussion_r3251199058


##########
rust/sedona-schema/src/datatypes.rs:
##########
@@ -67,14 +67,29 @@ pub const WKB_VIEW_GEOMETRY: SedonaType = 
SedonaType::WkbView(Edges::Planar, Crs
 /// Sentinel for [`SedonaType::Wkb`] with spherical edges
 ///
 /// This constant is useful when defining type signatures as these ignore the 
Crs when
-/// matching (and `SedonaType::Wkb(...)` is verbose)
+/// matching (and `SedonaType::Wkb(...)` is verbose). Note that 
[WKB_GEOGRAPHY_WGS84]
+/// is likely more appropriate in many cases.
 pub const WKB_GEOGRAPHY: SedonaType = SedonaType::Wkb(Edges::Spherical, 
Crs::None);
 
 /// Sentinel for [`SedonaType::WkbView`] with spherical edges
 ///
 /// See [`WKB_GEOGRAPHY`]
 pub const WKB_VIEW_GEOGRAPHY: SedonaType = 
SedonaType::WkbView(Edges::Spherical, Crs::None);
 
+/// Sentinel for [`SedonaType::Wkb`] with spherical edges and longitude, 
latitude CRS
+///
+/// This constant is useful when defining type signatures as these ignore the 
Crs when
+/// matching (and `SedonaType::Wkb(...)` is verbose)
+pub static WKB_GEOGRAPHY_WGS84: LazyLock<SedonaType> =
+    LazyLock::new(|| SedonaType::Wkb(Edges::Spherical, lnglat()));
+
+/// Sentinel for [`SedonaType::Wkb`] with spherical edges and longitude, 
latitude CRS
+///
+/// This constant is useful when defining type signatures as these ignore the 
Crs when
+/// matching (and `SedonaType::Wkb(...)` is verbose)

Review Comment:
   The doc comments for `WKB_GEOGRAPHY_WGS84` and `WKB_VIEW_GEOGRAPHY_WGS84` 
are identical and state the constant is useful "when defining type signatures 
as these ignore the Crs when matching". However, unlike the planar/non-WGS84 
sentinels (which truly are signature-only with `Crs::None`), this constant 
carries a concrete `lnglat()` CRS and the new code uses it as the actual return 
type of `ST_GeogFromWKT`/`ST_GeogFromWKB`/`ST_GeogPoint`. The docstring should 
be updated to reflect that this constant represents a concrete output type, not 
just a signature sentinel.
   



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -546,15 +607,92 @@ impl CoordinateReferenceSystem for ProjJSON {
     fn to_crs_string(&self) -> String {
         self.to_json()
     }
+
+    fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+        let Value::Object(obj) = &self.value else {
+            return Ok(None);
+        };
+
+        let Some(Value::String(crs_type)) = obj.get("type") else {
+            return Ok(None);
+        };
+
+        // Only geographic CRSes have geographic params
+        if crs_type != "GeographicCRS" {
+            return Ok(None);
+        }
+
+        // Try datum first, then datum_ensemble
+        let ellipsoid = if let Some(Value::Object(datum)) = obj.get("datum") {
+            datum.get("ellipsoid")
+        } else if let Some(Value::Object(datum_ensemble)) = 
obj.get("datum_ensemble") {
+            datum_ensemble.get("ellipsoid")
+        } else {
+            return exec_err!("PROJJSON GeographiCRS missing datum or 
datum_ensemble ellipsoid");

Review Comment:
   Typo in error message: "GeographiCRS" should be "GeographicCRS".
   



##########
c/sedona-proj/src/st_transform.rs:
##########
@@ -123,6 +141,11 @@ impl SedonaScalarKernel for STTransform {
             let maybe_from_crs = deserialize_crs(&from_constant)?;
             let maybe_to_crs = deserialize_crs(&to_constant)?;
             if let (Some(from_crs), Some(to_crs)) = (maybe_from_crs, 
maybe_to_crs) {
+                // Validate the target CRS is valid for the output type (e.g., 
geography
+                // requires a geographic CRS)
+                let to_crs_opt: Crs = Some(to_crs.clone());
+                validate_crs_for_type(&to_crs_opt, return_type)?;

Review Comment:
   `validate_crs_for_type` returns `sedona_internal_err!` for any 
non-Wkb/WkbView SedonaType. Callers in `st_transform.rs` pass `return_type` 
directly (line 147), which may be an item-CRS struct type when the target CRS 
is an array. In that case this validation function would error with 
"Non-geometry type in CRS--type validation". This path is unreachable today 
because the constant-resolution branch (lines 141-148) is only entered when 
both from/to are scalar constants — but the coupling is subtle. Consider either 
documenting this invariant explicitly or making `validate_crs_for_type` handle 
item-CRS struct types by extracting the inner type.
   



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -465,6 +493,39 @@ impl CoordinateReferenceSystem for AuthorityCode {
     fn to_crs_string(&self) -> String {
         self.auth_code.clone()
     }
+
+    /// Hard-code support for several known spherical CRSes so we can support 
them
+    /// in the geography type
+    fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+        match self.auth_code.as_str() {
+            // Default lnglat(). Here we use S2Earth::RadiusMeters() as a 
constant
+            // for consistent results (if we averaged over the ensemble, 
distance results
+            // may be subtely different between versions as the ensemble is 
updated).
+            "OGC:CRS84" | "EPSG:4326" => Ok(Some(GeographicCrsParams {
+                spherical_radius_m: 6371010.0,
+            })),
+            // NAD83
+            "OGC:CRS83" | "EPSG:4269" => {
+                const A: f64 = 6378137.0;
+                const INV_F: f64 = 298.257222101;
+                const B: f64 = A * (1.0 - 1.0 / INV_F);
+                Ok(Some(GeographicCrsParams {
+                    spherical_radius_m: (2.0 * A + B) / 3.0,
+                }))
+            }
+            // NAD27
+            // Mean radius = (2a + b) / 3
+            // https://spatialreference.org/ref/epsg/4269/

Review Comment:
   The reference link "https://spatialreference.org/ref/epsg/4269/"; placed in 
the NAD27 (EPSG:4267 / OGC:CRS27) branch points to EPSG:4269 (NAD83), which is 
the previous branch. The link should reference EPSG:4267 (NAD27) since this 
branch defines the Clarke 1866 ellipsoid for NAD27.
   



##########
rust/sedona-functions/src/st_setsrid.rs:
##########
@@ -534,6 +541,67 @@ pub fn validate_crs(
     Ok(())
 }
 
+/// Given a resolved [Crs], validate that it is appropriate for a given output
+///
+/// This is primarily to enforce that the geography type may only have a
+/// geographic CRS. The unset CRS is also allowed for now but this may be
+/// updated in the future (in general a Geography type should always have a
+/// CRS).
+pub fn validate_crs_for_type(crs: &Crs, sedona_type: &SedonaType) -> 
Result<()> {
+    let edges = match sedona_type {
+        SedonaType::Wkb(edges, _) | SedonaType::WkbView(edges, _) => edges,
+        _ => {
+            return sedona_internal_err!(
+                "Non-geometry type in CRS--type validation: {sedona_type}"
+            );
+        }
+    };
+
+    // For geography, ensure the CRS is geographical if present
+    if !matches!(edges, Edges::Planar) {
+        if let Some(crs) = crs {
+            if crs.geographic_params()?.is_none() {
+                return plan_err!(
+                    "Can't assign non-geographic CRS {crs} to column of type 
{sedona_type}"
+                );
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Given an array of CRSes (pre-normalized to stringview) check validity 
against type
+///
+/// This check is skipped for Geometry but occurs for Geography to ensure the
+/// target types are valid.
+pub fn validate_crs_array_for_type(crs_array: &ArrayRef, sedona_type: 
&SedonaType) -> Result<()> {
+    let edges = match sedona_type {
+        SedonaType::Wkb(edges, _) | SedonaType::WkbView(edges, _) => edges,
+        _ => {
+            return sedona_internal_err!(
+                "Non-geometry type in CRS--type validation: {sedona_type}"
+            );
+        }
+    };
+
+    // For geography, ensure the CRS is geographical if present
+    if !matches!(edges, Edges::Planar) {
+        let crs_array_stringview = as_string_view_array(crs_array)?;
+        for item in crs_array_stringview.iter().flatten() {
+            if let Some(crs) = deserialize_crs(item)? {
+                if crs.geographic_params()?.is_none() {
+                    return exec_err!(
+                        "Can't assign non-geographic CRS item {item} to column 
of type {sedona_type}"
+                    );
+                }
+            }
+        }
+    }
+
+    Ok(())
+}

Review Comment:
   `validate_crs_array_for_type` deserializes and validates every CRS value in 
the array on every invocation. For large arrays with mostly-repeating CRS 
strings, this is wasteful (each row does a `deserialize_crs` + 
`geographic_params` call). Consider caching results per-distinct-string, 
similar to how `CachedCrsNormalization`/`CachedSRIDToCrs` are already used 
elsewhere in this codebase.



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -335,6 +335,34 @@ pub trait CoordinateReferenceSystem: Debug {
     /// not be escaped. This is the representation expected as input to PROJ, 
GDAL,
     /// and Parquet GEOMETRY/GEOGRAPHY representations of CRS.
     fn to_crs_string(&self) -> String;
+
+    /// For Geographic CRSes, return the geographic parameters
+    ///
+    /// For non-geographic CRSes, this must return None. For the purposes of 
this
+    /// trait, a geographic CRS must be suitable for the Geography type (i.e.,
+    /// units in degrees).
+    fn geographic_params(&self) -> Result<Option<GeographicCrsParams>>;

Review Comment:
   The new `geographic_params` method is added to the public 
`CoordinateReferenceSystem` trait without a default implementation. This is a 
breaking change for any out-of-tree implementor of the trait. Consider adding a 
default `fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> { 
Ok(None) }` to preserve backward compatibility.
   



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -465,6 +493,39 @@ impl CoordinateReferenceSystem for AuthorityCode {
     fn to_crs_string(&self) -> String {
         self.auth_code.clone()
     }
+
+    /// Hard-code support for several known spherical CRSes so we can support 
them
+    /// in the geography type
+    fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+        match self.auth_code.as_str() {
+            // Default lnglat(). Here we use S2Earth::RadiusMeters() as a 
constant
+            // for consistent results (if we averaged over the ensemble, 
distance results
+            // may be subtely different between versions as the ensemble is 
updated).

Review Comment:
   Typo: "subtely" should be "subtly".
   



##########
rust/sedona-schema/src/crs.rs:
##########
@@ -546,15 +607,92 @@ impl CoordinateReferenceSystem for ProjJSON {
     fn to_crs_string(&self) -> String {
         self.to_json()
     }
+
+    fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+        let Value::Object(obj) = &self.value else {
+            return Ok(None);
+        };
+
+        let Some(Value::String(crs_type)) = obj.get("type") else {
+            return Ok(None);
+        };
+
+        // Only geographic CRSes have geographic params
+        if crs_type != "GeographicCRS" {
+            return Ok(None);
+        }
+
+        // Try datum first, then datum_ensemble
+        let ellipsoid = if let Some(Value::Object(datum)) = obj.get("datum") {
+            datum.get("ellipsoid")
+        } else if let Some(Value::Object(datum_ensemble)) = 
obj.get("datum_ensemble") {
+            datum_ensemble.get("ellipsoid")
+        } else {
+            return exec_err!("PROJJSON GeographiCRS missing datum or 
datum_ensemble ellipsoid");

Review Comment:
   When PROJJSON contains a `datum` field that is not an Object (e.g., a string 
or null), the `if let Some(Value::Object(datum))` branch falls through to the 
`else if` checking `datum_ensemble`, and if `datum_ensemble` is missing 
entirely returns the error "missing datum or datum_ensemble ellipsoid". This is 
misleading when the user actually supplied a malformed `datum` field. Consider 
distinguishing the "malformed datum" case from the "missing both" case, or 
surface the underlying issue more clearly to aid debugging.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to