Copilot commented on code in PR #848:
URL: https://github.com/apache/sedona-db/pull/848#discussion_r3251199058
##########
rust/sedona-schema/src/datatypes.rs:
##########
@@ -67,14 +67,29 @@ pub const WKB_VIEW_GEOMETRY: SedonaType =
SedonaType::WkbView(Edges::Planar, Crs
/// Sentinel for [`SedonaType::Wkb`] with spherical edges
///
/// This constant is useful when defining type signatures as these ignore the
Crs when
-/// matching (and `SedonaType::Wkb(...)` is verbose)
+/// matching (and `SedonaType::Wkb(...)` is verbose). Note that
[WKB_GEOGRAPHY_WGS84]
+/// is likely more appropriate in many cases.
pub const WKB_GEOGRAPHY: SedonaType = SedonaType::Wkb(Edges::Spherical,
Crs::None);
/// Sentinel for [`SedonaType::WkbView`] with spherical edges
///
/// See [`WKB_GEOGRAPHY`]
pub const WKB_VIEW_GEOGRAPHY: SedonaType =
SedonaType::WkbView(Edges::Spherical, Crs::None);
+/// Sentinel for [`SedonaType::Wkb`] with spherical edges and longitude,
latitude CRS
+///
+/// This constant is useful when defining type signatures as these ignore the
Crs when
+/// matching (and `SedonaType::Wkb(...)` is verbose)
+pub static WKB_GEOGRAPHY_WGS84: LazyLock<SedonaType> =
+ LazyLock::new(|| SedonaType::Wkb(Edges::Spherical, lnglat()));
+
+/// Sentinel for [`SedonaType::Wkb`] with spherical edges and longitude,
latitude CRS
+///
+/// This constant is useful when defining type signatures as these ignore the
Crs when
+/// matching (and `SedonaType::Wkb(...)` is verbose)
Review Comment:
The doc comments for `WKB_GEOGRAPHY_WGS84` and `WKB_VIEW_GEOGRAPHY_WGS84`
are identical and state the constant is useful "when defining type signatures
as these ignore the Crs when matching". However, unlike the planar/non-WGS84
sentinels (which truly are signature-only with `Crs::None`), this constant
carries a concrete `lnglat()` CRS and the new code uses it as the actual return
type of `ST_GeogFromWKT`/`ST_GeogFromWKB`/`ST_GeogPoint`. The docstring should
be updated to reflect that this constant represents a concrete output type, not
just a signature sentinel.
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -546,15 +607,92 @@ impl CoordinateReferenceSystem for ProjJSON {
fn to_crs_string(&self) -> String {
self.to_json()
}
+
+ fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+ let Value::Object(obj) = &self.value else {
+ return Ok(None);
+ };
+
+ let Some(Value::String(crs_type)) = obj.get("type") else {
+ return Ok(None);
+ };
+
+ // Only geographic CRSes have geographic params
+ if crs_type != "GeographicCRS" {
+ return Ok(None);
+ }
+
+ // Try datum first, then datum_ensemble
+ let ellipsoid = if let Some(Value::Object(datum)) = obj.get("datum") {
+ datum.get("ellipsoid")
+ } else if let Some(Value::Object(datum_ensemble)) =
obj.get("datum_ensemble") {
+ datum_ensemble.get("ellipsoid")
+ } else {
+ return exec_err!("PROJJSON GeographiCRS missing datum or
datum_ensemble ellipsoid");
Review Comment:
Typo in error message: "GeographiCRS" should be "GeographicCRS".
##########
c/sedona-proj/src/st_transform.rs:
##########
@@ -123,6 +141,11 @@ impl SedonaScalarKernel for STTransform {
let maybe_from_crs = deserialize_crs(&from_constant)?;
let maybe_to_crs = deserialize_crs(&to_constant)?;
if let (Some(from_crs), Some(to_crs)) = (maybe_from_crs,
maybe_to_crs) {
+ // Validate the target CRS is valid for the output type (e.g.,
geography
+ // requires a geographic CRS)
+ let to_crs_opt: Crs = Some(to_crs.clone());
+ validate_crs_for_type(&to_crs_opt, return_type)?;
Review Comment:
`validate_crs_for_type` returns `sedona_internal_err!` for any
non-Wkb/WkbView SedonaType. Callers in `st_transform.rs` pass `return_type`
directly (line 147), which may be an item-CRS struct type when the target CRS
is an array. In that case this validation function would error with
"Non-geometry type in CRS--type validation". This path is unreachable today
because the constant-resolution branch (lines 141-148) is only entered when
both from/to are scalar constants — but the coupling is subtle. Consider either
documenting this invariant explicitly or making `validate_crs_for_type` handle
item-CRS struct types by extracting the inner type.
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -465,6 +493,39 @@ impl CoordinateReferenceSystem for AuthorityCode {
fn to_crs_string(&self) -> String {
self.auth_code.clone()
}
+
+ /// Hard-code support for several known spherical CRSes so we can support
them
+ /// in the geography type
+ fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+ match self.auth_code.as_str() {
+ // Default lnglat(). Here we use S2Earth::RadiusMeters() as a
constant
+ // for consistent results (if we averaged over the ensemble,
distance results
+ // may be subtely different between versions as the ensemble is
updated).
+ "OGC:CRS84" | "EPSG:4326" => Ok(Some(GeographicCrsParams {
+ spherical_radius_m: 6371010.0,
+ })),
+ // NAD83
+ "OGC:CRS83" | "EPSG:4269" => {
+ const A: f64 = 6378137.0;
+ const INV_F: f64 = 298.257222101;
+ const B: f64 = A * (1.0 - 1.0 / INV_F);
+ Ok(Some(GeographicCrsParams {
+ spherical_radius_m: (2.0 * A + B) / 3.0,
+ }))
+ }
+ // NAD27
+ // Mean radius = (2a + b) / 3
+ // https://spatialreference.org/ref/epsg/4269/
Review Comment:
The reference link "https://spatialreference.org/ref/epsg/4269/" placed in
the NAD27 (EPSG:4267 / OGC:CRS27) branch points to EPSG:4269 (NAD83), which is
the previous branch. The link should reference EPSG:4267 (NAD27) since this
branch defines the Clarke 1866 ellipsoid for NAD27.
##########
rust/sedona-functions/src/st_setsrid.rs:
##########
@@ -534,6 +541,67 @@ pub fn validate_crs(
Ok(())
}
+/// Given a resolved [Crs], validate that it is appropriate for a given output
+///
+/// This is primarily to enforce that the geography type may only have a
+/// geographic CRS. The unset CRS is also allowed for now but this may be
+/// updated in the future (in general a Geography type should always have a
+/// CRS).
+pub fn validate_crs_for_type(crs: &Crs, sedona_type: &SedonaType) ->
Result<()> {
+ let edges = match sedona_type {
+ SedonaType::Wkb(edges, _) | SedonaType::WkbView(edges, _) => edges,
+ _ => {
+ return sedona_internal_err!(
+ "Non-geometry type in CRS--type validation: {sedona_type}"
+ );
+ }
+ };
+
+ // For geography, ensure the CRS is geographical if present
+ if !matches!(edges, Edges::Planar) {
+ if let Some(crs) = crs {
+ if crs.geographic_params()?.is_none() {
+ return plan_err!(
+ "Can't assign non-geographic CRS {crs} to column of type
{sedona_type}"
+ );
+ }
+ }
+ }
+
+ Ok(())
+}
+
+/// Given an array of CRSes (pre-normalized to stringview) check validity
against type
+///
+/// This check is skipped for Geometry but occurs for Geography to ensure the
+/// target types are valid.
+pub fn validate_crs_array_for_type(crs_array: &ArrayRef, sedona_type:
&SedonaType) -> Result<()> {
+ let edges = match sedona_type {
+ SedonaType::Wkb(edges, _) | SedonaType::WkbView(edges, _) => edges,
+ _ => {
+ return sedona_internal_err!(
+ "Non-geometry type in CRS--type validation: {sedona_type}"
+ );
+ }
+ };
+
+ // For geography, ensure the CRS is geographical if present
+ if !matches!(edges, Edges::Planar) {
+ let crs_array_stringview = as_string_view_array(crs_array)?;
+ for item in crs_array_stringview.iter().flatten() {
+ if let Some(crs) = deserialize_crs(item)? {
+ if crs.geographic_params()?.is_none() {
+ return exec_err!(
+ "Can't assign non-geographic CRS item {item} to column
of type {sedona_type}"
+ );
+ }
+ }
+ }
+ }
+
+ Ok(())
+}
Review Comment:
`validate_crs_array_for_type` deserializes and validates every CRS value in
the array on every invocation. For large arrays with mostly-repeating CRS
strings, this is wasteful (each row does a `deserialize_crs` +
`geographic_params` call). Consider caching results per-distinct-string,
similar to how `CachedCrsNormalization`/`CachedSRIDToCrs` are already used
elsewhere in this codebase.
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -335,6 +335,34 @@ pub trait CoordinateReferenceSystem: Debug {
/// not be escaped. This is the representation expected as input to PROJ,
GDAL,
/// and Parquet GEOMETRY/GEOGRAPHY representations of CRS.
fn to_crs_string(&self) -> String;
+
+ /// For Geographic CRSes, return the geographic parameters
+ ///
+ /// For non-geographic CRSes, this must return None. For the purposes of
this
+ /// trait, a geographic CRS must be suitable for the Geography type (i.e.,
+ /// units in degrees).
+ fn geographic_params(&self) -> Result<Option<GeographicCrsParams>>;
Review Comment:
The new `geographic_params` method is added to the public
`CoordinateReferenceSystem` trait without a default implementation. This is a
breaking change for any out-of-tree implementor of the trait. Consider adding a
default `fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
Ok(None) }` to preserve backward compatibility.
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -465,6 +493,39 @@ impl CoordinateReferenceSystem for AuthorityCode {
fn to_crs_string(&self) -> String {
self.auth_code.clone()
}
+
+ /// Hard-code support for several known spherical CRSes so we can support
them
+ /// in the geography type
+ fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+ match self.auth_code.as_str() {
+ // Default lnglat(). Here we use S2Earth::RadiusMeters() as a
constant
+ // for consistent results (if we averaged over the ensemble,
distance results
+ // may be subtely different between versions as the ensemble is
updated).
Review Comment:
Typo: "subtely" should be "subtly".
##########
rust/sedona-schema/src/crs.rs:
##########
@@ -546,15 +607,92 @@ impl CoordinateReferenceSystem for ProjJSON {
fn to_crs_string(&self) -> String {
self.to_json()
}
+
+ fn geographic_params(&self) -> Result<Option<GeographicCrsParams>> {
+ let Value::Object(obj) = &self.value else {
+ return Ok(None);
+ };
+
+ let Some(Value::String(crs_type)) = obj.get("type") else {
+ return Ok(None);
+ };
+
+ // Only geographic CRSes have geographic params
+ if crs_type != "GeographicCRS" {
+ return Ok(None);
+ }
+
+ // Try datum first, then datum_ensemble
+ let ellipsoid = if let Some(Value::Object(datum)) = obj.get("datum") {
+ datum.get("ellipsoid")
+ } else if let Some(Value::Object(datum_ensemble)) =
obj.get("datum_ensemble") {
+ datum_ensemble.get("ellipsoid")
+ } else {
+ return exec_err!("PROJJSON GeographiCRS missing datum or
datum_ensemble ellipsoid");
Review Comment:
When PROJJSON contains a `datum` field that is not an Object (e.g., a string
or null), the `if let Some(Value::Object(datum))` branch falls through to the
`else if` checking `datum_ensemble`, and if `datum_ensemble` is missing
entirely returns the error "missing datum or datum_ensemble ellipsoid". This is
misleading when the user actually supplied a malformed `datum` field. Consider
distinguishing the "malformed datum" case from the "missing both" case, or
surface the underlying issue more clearly to aid debugging.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]