paleolimbot commented on code in PR #749:
URL: https://github.com/apache/sedona-db/pull/749#discussion_r3235261182
##########
rust/sedona-raster/src/traits.rs:
##########
@@ -73,62 +108,550 @@ pub trait MetadataRef {
/// Y-direction skew/rotation
fn skew_y(&self) -> f64;
}
-/// Trait for accessing all bands in a raster
-pub trait BandsRef {
- /// Number of bands in the raster
- fn len(&self) -> usize;
- /// Check if no bands are present
- fn is_empty(&self) -> bool {
+
+impl MetadataRef for RasterMetadata {
+ fn width(&self) -> u64 {
+ self.width
+ }
+ fn height(&self) -> u64 {
+ self.height
+ }
+ fn upper_left_x(&self) -> f64 {
+ self.upperleft_x
+ }
+ fn upper_left_y(&self) -> f64 {
+ self.upperleft_y
+ }
+ fn scale_x(&self) -> f64 {
+ self.scale_x
+ }
+ fn scale_y(&self) -> f64 {
+ self.scale_y
+ }
+ fn skew_x(&self) -> f64 {
+ self.skew_x
+ }
+ fn skew_y(&self) -> f64 {
+ self.skew_y
+ }
+}
+
+impl RasterMetadata {
+ pub fn width(&self) -> u64 {
+ self.width
+ }
+ pub fn height(&self) -> u64 {
+ self.height
+ }
+ pub fn upper_left_x(&self) -> f64 {
+ self.upperleft_x
+ }
+ pub fn upper_left_y(&self) -> f64 {
+ self.upperleft_y
+ }
+ pub fn scale_x(&self) -> f64 {
+ self.scale_x
+ }
+ pub fn scale_y(&self) -> f64 {
+ self.scale_y
+ }
+ pub fn skew_x(&self) -> f64 {
+ self.skew_x
+ }
+ pub fn skew_y(&self) -> f64 {
+ self.skew_y
+ }
+}
+
+/// Concrete band metadata returned by `BandRef::metadata()`.
+///
+/// Restored from the pre-N-D schema. The `outdb_url` and `outdb_band_id`
+/// fields are eagerly parsed from the N-D `outdb_uri` (which carries a
+/// `#band=N` fragment in the SedonaDB convention) so callers from the
+/// pre-N-D era keep compiling against the same field names.
+#[derive(Debug, Clone)]
+pub struct BandMetadata {
+ pub nodata_value: Option<Vec<u8>>,
+ pub storage_type: sedona_schema::raster::StorageType,
+ pub datatype: BandDataType,
+ pub outdb_url: Option<String>,
+ pub outdb_band_id: Option<u32>,
+}
+
+impl BandMetadata {
+ pub fn nodata_value(&self) -> Option<&[u8]> {
+ self.nodata_value.as_deref()
+ }
+ /// Returns the storage type. Wrapped in `Result` to match main's
+ /// `BandMetadataRef::storage_type()` signature — our shim
+ /// implementation never errors, but the signature is preserved so
+ /// existing `matches!(band.metadata().storage_type(), Ok(...))`
+ /// patterns from before the N-D refactor keep compiling.
+ pub fn storage_type(&self) -> Result<sedona_schema::raster::StorageType,
ArrowError> {
+ Ok(self.storage_type)
+ }
+ /// Returns the band data type. Wrapped in `Result` to match main's
+ /// `BandMetadataRef::data_type()` signature — see `storage_type()`.
+ pub fn data_type(&self) -> Result<BandDataType, ArrowError> {
+ Ok(self.datatype)
+ }
+ pub fn outdb_url(&self) -> Option<&str> {
+ self.outdb_url.as_deref()
+ }
+ pub fn outdb_band_id(&self) -> Option<u32> {
+ self.outdb_band_id
+ }
+ /// Nodata value interpreted as f64. Mirrors the pre-N-D
+ /// `BandMetadataRef::nodata_value_as_f64()`. Uses the lossless
+ /// conversion (errors on i64/u64 magnitudes > 2^53) so the shim
+ /// surface picks up the same correctness fix as
+ /// `BandRef::nodata_as_f64()`.
+ pub fn nodata_value_as_f64(&self) -> Result<Option<f64>, ArrowError> {
+ let bytes = match self.nodata_value.as_deref() {
+ Some(b) => b,
+ None => return Ok(None),
+ };
+ nodata_bytes_to_f64_lossless(bytes, &self.datatype).map(Some)
+ }
+}
+
+/// Parse the SedonaDB `#band=N` fragment out of an out-DB URI.
+/// Returns `(base_url, band_id)`; band_id defaults to 1 if absent.
+/// Duplicated (intentionally — and minimally) from
+/// `sedona-raster-gdal::source_uri` because the shim lives in
+/// `sedona-raster` and can't reach across the crate boundary.
+fn split_outdb_band_fragment(uri: &str) -> (String, u32) {
+ if let Some(hash_pos) = uri.rfind('#') {
+ let (base, fragment) = uri.split_at(hash_pos);
+ let fragment = &fragment[1..]; // skip the '#'
+ if let Some(rest) = fragment.strip_prefix("band=") {
+ if let Ok(n) = rest.parse::<u32>() {
+ return (base.to_string(), n);
+ }
+ }
+ }
+ (uri.to_string(), 1)
+}
+
+/// Iteration view over a raster's bands. Returned by `RasterRef::bands()`.
+///
+/// Wraps a borrowed `&dyn RasterRef` and offers the `len()` / `band(1-based)`
+/// / `iter()` shape that callers used before the N-D refactor. New code can
+/// equivalently use `RasterRef::num_bands()` and `RasterRef::band(0-based)`
+/// directly; both call patterns coexist.
+pub struct Bands<'a> {
+ raster: &'a dyn RasterRef,
+}
+
+impl<'a> Bands<'a> {
+ /// Wrap a `&dyn RasterRef` for the legacy 1-based band-access surface.
+ pub fn new(raster: &'a dyn RasterRef) -> Self {
+ Self { raster }
+ }
+}
+
+impl<'a> Bands<'a> {
+ /// Number of bands in the raster.
+ pub fn len(&self) -> usize {
+ self.raster.num_bands()
+ }
+
+ /// True iff the raster has zero bands.
+ pub fn is_empty(&self) -> bool {
self.len() == 0
}
- /// Get a specific band by number (returns Error if out of bounds)
- /// By convention, band numbers are 1-based
- fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError>;
- /// Iterator over all bands
- fn iter(&self) -> Box<dyn BandIterator<'_> + '_>;
+
+ /// Look up a band by **1-based** number. Returns an error rather than
+ /// `None` so callers can use `?`. For 0-based access, use
+ /// `RasterRef::band` directly.
+ pub fn band(&self, number: usize) -> Result<Box<dyn BandRef + 'a>,
ArrowError> {
+ if number == 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid band number {number}: band numbers must be 1-based"
+ )));
+ }
+ self.raster.band(number - 1).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Band number {} is out of range: this raster has {} bands",
+ number,
+ self.raster.num_bands()
+ ))
+ })
+ }
+
+ /// Iterate over every band in 0-based order.
+ pub fn iter(&self) -> impl Iterator<Item = Box<dyn BandRef + 'a>> + 'a {
+ let raster = self.raster;
+ (0..raster.num_bands()).filter_map(move |i| raster.band(i))
+ }
}
-/// Trait for accessing individual band data
+/// Trait for accessing an N-dimensional raster (top level).
+///
+/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with
+/// a single flat interface. Bands are 0-indexed.
+pub trait RasterRef {
+ /// Number of bands/variables
+ fn num_bands(&self) -> usize;
+
+ /// Access a band by 0-based index
+ fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>>;
+
+ /// 1-based band-access view used by callers from before the N-D
+ /// refactor. Implementers typically write `Bands::new(self)`.
+ fn bands(&self) -> Bands<'_>;
+
+ /// Band name (e.g., Zarr variable name). None for unnamed bands.
+ fn band_name(&self, index: usize) -> Option<&str>;
+
+ /// Fast path for band data type — reads the scalar `data_type` column
+ /// without materialising a full `BandRef`. UDFs that only need this
+ /// metadata field should prefer this over `band(i)?.data_type()`.
+ /// Returns None if `index` is out of range or the discriminant is invalid.
+ ///
+ /// The default implementation delegates to `band(i)`. Backends with a
+ /// flat columnar layout should override for the no-allocation fast path.
+ fn band_data_type(&self, index: usize) -> Option<BandDataType> {
+ self.band(index).map(|b| b.data_type())
+ }
+
+ /// Fast path for band outdb URI — reads the `outdb_uri` column without
+ /// materialising a `BandRef`. Returns None if the band has no URI or
+ /// if `index` is out of range.
+ ///
+ /// The default implementation must allocate a `Box<dyn BandRef>`; the
+ /// raster-array backend overrides it to read the column directly.
+ /// Default returns None because the borrow can't outlive the boxed band.
+ fn band_outdb_uri(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band outdb format — reads the `outdb_format` column
+ /// without materialising a `BandRef`. Default returns None for the
+ /// same lifetime reason as `band_outdb_uri`.
+ fn band_outdb_format(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band nodata bytes — reads the `nodata` column without
+ /// materialising a `BandRef`. Default returns None for the same
+ /// lifetime reason as `band_outdb_uri`.
+ fn band_nodata(&self, index: usize) -> Option<&[u8]> {
+ let _ = index;
+ None
+ }
+
+ /// CRS string (PROJJSON, WKT, or authority code). None if not set.
+ fn crs(&self) -> Option<&str>;
+
+ /// 6-element affine transform in GDAL GeoTransform order:
+ /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]`
+ fn transform(&self) -> &[f64];
+
+ /// Eagerly-computed concrete metadata view (width, height, geotransform
+ /// scalars). Mirrors the pre-N-D `RasterRef::metadata()` accessor.
+ ///
+ /// Panics if `spatial_shape` lacks width/height or `transform` is the
+ /// wrong length — those are corrupt-schema cases that error cleanly
+ /// through the `width()`/`height()` trait methods, but the metadata
+ /// accessor predates that contract and is kept infallible for caller
+ /// ergonomics.
+ fn metadata(&self) -> RasterMetadata {
+ let width = self
+ .width()
+ .expect("raster has no width (spatial_shape missing); use width()?
for error handling");
+ let height = self
+ .height()
+ .expect("raster has no height; use height()? for error handling");
+ let t = self.transform();
+ if t.len() != 6 {
+ panic!("transform must be 6 elements, got {}", t.len());
+ }
+ RasterMetadata {
+ width,
+ height,
+ upperleft_x: t[0],
+ scale_x: t[1],
+ skew_x: t[2],
+ upperleft_y: t[3],
+ skew_y: t[4],
+ scale_y: t[5],
+ }
+ }
+
+ /// Spatial dimension names, in order (today `["x","y"]`; a future Z phase
+ /// would extend to `["x","y","z"]`). Every band must contain each of these
+ /// names in its own `dim_names`, with matching sizes.
+ fn spatial_dims(&self) -> Vec<&str>;
+
+ /// Spatial dimension sizes, in the same order as `spatial_dims`. Today
+ /// `[width, height]`.
+ fn spatial_shape(&self) -> &[i64];
+
+ /// Name of the X spatial dimension (e.g., "x", "lon", "easting").
+ fn x_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().next().unwrap_or("x")
+ }
+
+ /// Name of the Y spatial dimension (e.g., "y", "lat", "northing").
+ fn y_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().nth(1).unwrap_or("y")
+ }
+
+ /// Width in pixels — size of the X spatial dimension from the top-level
+ /// `spatial_shape`. Errors if `spatial_shape` is empty or the X size is
+ /// negative; both are invariant violations rather than legitimate "no
+ /// value" states.
+ fn width(&self) -> Result<u64, ArrowError> {
+ let shape = self.spatial_shape();
+ let Some(&v) = shape.first() else {
+ return Err(ArrowError::InvalidArgumentError(
+ "raster has no width (spatial_shape is empty)".to_string(),
+ ));
+ };
+ if v < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster width must be non-negative, got {v}"
+ )));
+ }
+ Ok(v as u64)
+ }
+
+ /// Height in pixels — size of the Y spatial dimension from the top-level
+ /// `spatial_shape`. Errors if `spatial_shape` has fewer than two entries
+ /// or the Y size is negative.
+ fn height(&self) -> Result<u64, ArrowError> {
+ let shape = self.spatial_shape();
+ let Some(&v) = shape.get(1) else {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster has no height (spatial_shape has {} entries, need >=
2)",
+ shape.len()
+ )));
+ };
+ if v < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster height must be non-negative, got {v}"
+ )));
+ }
+ Ok(v as u64)
+ }
+
+ /// Look up a band by name. Returns None if no band has that name.
+ fn band_by_name(&self, name: &str) -> Option<Box<dyn BandRef + '_>> {
+ (0..self.num_bands())
+ .find(|&i| self.band_name(i) == Some(name))
+ .and_then(|i| self.band(i))
+ }
+}
+
+/// Trait for accessing a single band/variable within an N-D raster.
+///
+/// This is the consumer interface. Implementations handle storage details
+/// Two data access paths:
+/// - `contiguous_data()` — flat row-major bytes for consumers that don't need
+/// stride awareness (most RS_* functions, GDAL boundary, serialization).
+/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware
+/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies.
pub trait BandRef {
- /// Band metadata accessor
- fn metadata(&self) -> &dyn BandMetadataRef;
- /// Raw band data as bytes (zero-copy access)
- fn data(&self) -> &[u8];
-}
-
-/// Trait for accessing individual band metadata
-pub trait BandMetadataRef {
- /// No-data value as raw bytes (None if null)
- fn nodata_value(&self) -> Option<&[u8]>;
- /// Storage type (InDb, OutDbRef, etc)
- fn storage_type(&self) -> Result<StorageType, ArrowError>;
- /// Band data type (UInt8, Float32, etc.)
- fn data_type(&self) -> Result<BandDataType, ArrowError>;
- /// OutDb URL (only used when storage_type == OutDbRef)
- fn outdb_url(&self) -> Option<&str>;
- /// OutDb band ID (only used when storage_type == OutDbRef)
- fn outdb_band_id(&self) -> Option<u32>;
-
- /// No-data value interpreted as f64.
+ // -- Dimension metadata --
+
+ /// Number of dimensions in this band
+ fn ndim(&self) -> usize;
+
+ /// Dimension names in order (e.g., `["time", "y", "x"]`)
+ fn dim_names(&self) -> Vec<&str>;
+
+ /// Visible shape — size of each dimension in the band's view, in
+ /// `dim_names` order. Derived from `view`: `[v.steps for v in view]`.
+ /// This is what almost all consumers want; use `raw_source_shape()` only
+ /// when you need to address into the raw `data` buffer (e.g. FFI).
+ fn shape(&self) -> &[u64];
+
+ /// **Internal/FFI-only.** Natural C-order extent of the band's
+ /// underlying `data` buffer, indexed by *source* axis (not visible
+ /// axis). Almost every consumer wants `shape()` instead — that is the
+ /// region the band exposes, and is what you compare against
+ /// `spatial_shape`, iterate over for pixels, and compose further views
+ /// against. The two only agree when the band's view is the identity;
+ /// any slice, broadcast, or permutation makes them diverge.
+ ///
+ /// Use this only when you need to index directly into the raw `data`
+ /// bytes (e.g. Arrow C Data Interface, numpy zero-copy views) and you
+ /// also handle `view()` and the byte-stride layout from `nd_buffer()`.
+ fn raw_source_shape(&self) -> &[u64];
+
+ /// Per-visible-dimension view entries describing how the band's
+ /// visible axes map onto its `source_shape`. `view().len() == ndim()`.
+ /// See `ViewEntry` for per-entry semantics.
+ fn view(&self) -> &[ViewEntry];
+
+ /// Size of a named dimension (None if doesn't exist)
+ fn dim_size(&self, name: &str) -> Option<u64> {
+ let idx = self.dim_index(name)?;
+ Some(self.shape()[idx])
+ }
+
+ /// Index of a named dimension (None if doesn't exist)
+ fn dim_index(&self, name: &str) -> Option<usize> {
+ self.dim_names().iter().position(|n| *n == name)
+ }
+
+ /// True iff this band is shaped exactly like a legacy 2-D raster band:
+ /// `dim_names == ["y", "x"]` and the view is the identity over the
+ /// band's `raw_source_shape` (no slice, no broadcast, no permutation).
+ ///
+ /// GDAL-backed SQL functions use this to refuse N-D bands cleanly while
+ /// they wait for an MDArray-aware port.
+ fn is_2d(&self) -> bool {
+ let dims = self.dim_names();
+ if dims.len() != 2 || dims[0] != "y" || dims[1] != "x" {
+ return false;
+ }
+ let view = self.view();
+ let source_shape = self.raw_source_shape();
+ if view.len() != 2 || source_shape.len() != 2 {
+ return false;
+ }
+ view.iter().enumerate().all(|(i, v)| {
+ v.source_axis as usize == i
+ && v.start == 0
+ && v.step == 1
+ && v.steps >= 0
+ && v.steps as u64 == source_shape[i]
+ })
+ }
+
+ // -- Band metadata --
+
+ /// Data type for all elements in this band
+ fn data_type(&self) -> BandDataType;
+
+ /// Nodata value as raw bytes (None if not set)
+ fn nodata(&self) -> Option<&[u8]>;
+
+ /// OutDb URI — location of the external resource (e.g.
+ /// `"s3://bucket/file.tif"`, `"file:///…"`, `"mem://…"`). None for
+ /// in-memory bands. Scheme resolution is delegated to an
+ /// `ObjectStoreRegistry`; it does *not* imply a format.
+ fn outdb_uri(&self) -> Option<&str> {
+ None
+ }
+
+ /// OutDb format — how to interpret the bytes at `outdb_uri`
+ /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's
+ /// `contiguous_data()` / `nd_buffer()` is authoritative.
+ fn outdb_format(&self) -> Option<&str> {
+ None
+ }
+
+ /// True if this band's bytes live in the `data` buffer (in-database).
+ /// False if the bytes must be fetched from `outdb_uri` (out-of-database).
+ ///
+ /// The discriminator is whether the `data` buffer is non-empty —
+ /// `outdb_uri` and `outdb_format` are orthogonal location/format hints
+ /// that may be set on either kind of band.
+ fn is_indb(&self) -> bool {
+ // Default: materialize via nd_buffer and check buffer emptiness.
+ // Concrete impls should override with a direct buffer check.
+ self.nd_buffer().is_ok_and(|b| !b.buffer.is_empty())
+ }
+
+ /// Eagerly-computed concrete band metadata. Mirrors the pre-N-D
+ /// `BandRef::metadata()` accessor.
+ ///
+ /// `outdb_url` and `outdb_band_id` are parsed from `outdb_uri()`'s
+ /// SedonaDB `#band=N` fragment convention so callers that pattern-match
+ /// on those fields keep compiling.
+ fn metadata(&self) -> BandMetadata {
+ let is_indb = self.is_indb();
+ // Match the pre-N-D contract: outdb_url / outdb_band_id are only
+ // populated when storage_type is OutDbRef. PR-B's schema lets the
+ // URI hint coexist with InDb data; this surface hides that.
+ let (outdb_url, outdb_band_id) = if !is_indb {
+ match self.outdb_uri() {
+ Some(uri) => {
+ let (base, band) = split_outdb_band_fragment(uri);
+ (Some(base), Some(band))
+ }
+ None => (None, None),
+ }
+ } else {
+ (None, None)
+ };
+ BandMetadata {
+ nodata_value: self.nodata().map(|b| b.to_vec()),
+ storage_type: if is_indb {
+ sedona_schema::raster::StorageType::InDb
+ } else {
+ sedona_schema::raster::StorageType::OutDbRef
+ },
+ datatype: self.data_type(),
+ outdb_url,
+ outdb_band_id,
+ }
+ }
+
+ // -- Data access --
+
+ /// Raw backing buffer + visible-region layout. Triggers load for lazy
+ /// impls. The returned `NdBuffer` describes the band's view in
+ /// byte-stride terms — `shape` is the visible shape, `strides` and
+ /// `offset` are computed by composing the view with the source's
+ /// natural C-order byte strides. Strides may be zero (broadcast) or
+ /// negative (reverse iteration).
+ fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError>;
+
+ /// Contiguous row-major bytes covering the *visible* region. Zero-copy
+ /// (`Cow::Borrowed`) when the view is full identity over a C-order
+ /// source buffer; copies into a new buffer when the view slices,
+ /// broadcasts, or permutes. Most RS_* functions use this.
+ fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError>;
+
+ /// Pre-N-D compatibility shim: raw row-major bytes for InDb,
+ /// identity-view bands. Panics on anything else (OutDb, non-identity
+ /// view, or a `contiguous_data` error) — corresponds to main's
+ /// infallible `BandRef::data() -> &[u8]` which only ever ran against
+ /// identity-view InDb bands.
+ fn data(&self) -> &[u8] {
+ // Default impl forwards through nd_buffer's borrowed slice. This
+ // only borrows the underlying band buffer for identity-view InDb
+ // bands; everything else is a corrupt-shape call site.
+ self.nd_buffer()
+ .expect("BandRef::data() requires an in-db band with bytes")
+ .buffer
Review Comment:
Should this also check for a non-identity view and panic?
##########
rust/sedona-raster/src/traits.rs:
##########
@@ -73,62 +108,550 @@ pub trait MetadataRef {
/// Y-direction skew/rotation
fn skew_y(&self) -> f64;
}
-/// Trait for accessing all bands in a raster
-pub trait BandsRef {
- /// Number of bands in the raster
- fn len(&self) -> usize;
- /// Check if no bands are present
- fn is_empty(&self) -> bool {
+
+impl MetadataRef for RasterMetadata {
+ fn width(&self) -> u64 {
+ self.width
+ }
+ fn height(&self) -> u64 {
+ self.height
+ }
+ fn upper_left_x(&self) -> f64 {
+ self.upperleft_x
+ }
+ fn upper_left_y(&self) -> f64 {
+ self.upperleft_y
+ }
+ fn scale_x(&self) -> f64 {
+ self.scale_x
+ }
+ fn scale_y(&self) -> f64 {
+ self.scale_y
+ }
+ fn skew_x(&self) -> f64 {
+ self.skew_x
+ }
+ fn skew_y(&self) -> f64 {
+ self.skew_y
+ }
+}
+
+impl RasterMetadata {
+ pub fn width(&self) -> u64 {
+ self.width
+ }
+ pub fn height(&self) -> u64 {
+ self.height
+ }
+ pub fn upper_left_x(&self) -> f64 {
+ self.upperleft_x
+ }
+ pub fn upper_left_y(&self) -> f64 {
+ self.upperleft_y
+ }
+ pub fn scale_x(&self) -> f64 {
+ self.scale_x
+ }
+ pub fn scale_y(&self) -> f64 {
+ self.scale_y
+ }
+ pub fn skew_x(&self) -> f64 {
+ self.skew_x
+ }
+ pub fn skew_y(&self) -> f64 {
+ self.skew_y
+ }
+}
+
+/// Concrete band metadata returned by `BandRef::metadata()`.
+///
+/// Restored from the pre-N-D schema. The `outdb_url` and `outdb_band_id`
+/// fields are eagerly parsed from the N-D `outdb_uri` (which carries a
+/// `#band=N` fragment in the SedonaDB convention) so callers from the
+/// pre-N-D era keep compiling against the same field names.
+#[derive(Debug, Clone)]
+pub struct BandMetadata {
+ pub nodata_value: Option<Vec<u8>>,
+ pub storage_type: sedona_schema::raster::StorageType,
+ pub datatype: BandDataType,
+ pub outdb_url: Option<String>,
+ pub outdb_band_id: Option<u32>,
+}
+
+impl BandMetadata {
+ pub fn nodata_value(&self) -> Option<&[u8]> {
+ self.nodata_value.as_deref()
+ }
+ /// Returns the storage type. Wrapped in `Result` to match main's
+ /// `BandMetadataRef::storage_type()` signature — our shim
+ /// implementation never errors, but the signature is preserved so
+ /// existing `matches!(band.metadata().storage_type(), Ok(...))`
+ /// patterns from before the N-D refactor keep compiling.
+ pub fn storage_type(&self) -> Result<sedona_schema::raster::StorageType,
ArrowError> {
+ Ok(self.storage_type)
+ }
+ /// Returns the band data type. Wrapped in `Result` to match main's
+ /// `BandMetadataRef::data_type()` signature — see `storage_type()`.
+ pub fn data_type(&self) -> Result<BandDataType, ArrowError> {
+ Ok(self.datatype)
+ }
+ pub fn outdb_url(&self) -> Option<&str> {
+ self.outdb_url.as_deref()
+ }
+ pub fn outdb_band_id(&self) -> Option<u32> {
+ self.outdb_band_id
+ }
+ /// Nodata value interpreted as f64. Mirrors the pre-N-D
+ /// `BandMetadataRef::nodata_value_as_f64()`. Uses the lossless
+ /// conversion (errors on i64/u64 magnitudes > 2^53) so the shim
+ /// surface picks up the same correctness fix as
+ /// `BandRef::nodata_as_f64()`.
+ pub fn nodata_value_as_f64(&self) -> Result<Option<f64>, ArrowError> {
+ let bytes = match self.nodata_value.as_deref() {
+ Some(b) => b,
+ None => return Ok(None),
+ };
+ nodata_bytes_to_f64_lossless(bytes, &self.datatype).map(Some)
+ }
+}
+
+/// Parse the SedonaDB `#band=N` fragment out of an out-DB URI.
+/// Returns `(base_url, band_id)`; band_id defaults to 1 if absent.
+/// Duplicated (intentionally — and minimally) from
+/// `sedona-raster-gdal::source_uri` because the shim lives in
+/// `sedona-raster` and can't reach across the crate boundary.
+fn split_outdb_band_fragment(uri: &str) -> (String, u32) {
+ if let Some(hash_pos) = uri.rfind('#') {
+ let (base, fragment) = uri.split_at(hash_pos);
+ let fragment = &fragment[1..]; // skip the '#'
+ if let Some(rest) = fragment.strip_prefix("band=") {
+ if let Ok(n) = rest.parse::<u32>() {
+ return (base.to_string(), n);
+ }
+ }
+ }
+ (uri.to_string(), 1)
+}
+
+/// Iteration view over a raster's bands. Returned by `RasterRef::bands()`.
+///
+/// Wraps a borrowed `&dyn RasterRef` and offers the `len()` / `band(1-based)`
+/// / `iter()` shape that callers used before the N-D refactor. New code can
+/// equivalently use `RasterRef::num_bands()` and `RasterRef::band(0-based)`
+/// directly; both call patterns coexist.
+pub struct Bands<'a> {
+ raster: &'a dyn RasterRef,
+}
+
+impl<'a> Bands<'a> {
+ /// Wrap a `&dyn RasterRef` for the legacy 1-based band-access surface.
+ pub fn new(raster: &'a dyn RasterRef) -> Self {
+ Self { raster }
+ }
+}
+
+impl<'a> Bands<'a> {
+ /// Number of bands in the raster.
+ pub fn len(&self) -> usize {
+ self.raster.num_bands()
+ }
+
+ /// True iff the raster has zero bands.
+ pub fn is_empty(&self) -> bool {
self.len() == 0
}
- /// Get a specific band by number (returns Error if out of bounds)
- /// By convention, band numbers are 1-based
- fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError>;
- /// Iterator over all bands
- fn iter(&self) -> Box<dyn BandIterator<'_> + '_>;
+
+ /// Look up a band by **1-based** number. Returns an error rather than
+ /// `None` so callers can use `?`. For 0-based access, use
+ /// `RasterRef::band` directly.
+ pub fn band(&self, number: usize) -> Result<Box<dyn BandRef + 'a>,
ArrowError> {
+ if number == 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid band number {number}: band numbers must be 1-based"
+ )));
+ }
+ self.raster.band(number - 1).ok_or_else(|| {
+ ArrowError::InvalidArgumentError(format!(
+ "Band number {} is out of range: this raster has {} bands",
+ number,
+ self.raster.num_bands()
+ ))
+ })
+ }
+
+ /// Iterate over every band in 0-based order.
+ pub fn iter(&self) -> impl Iterator<Item = Box<dyn BandRef + 'a>> + 'a {
+ let raster = self.raster;
+ (0..raster.num_bands()).filter_map(move |i| raster.band(i))
+ }
}
-/// Trait for accessing individual band data
+/// Trait for accessing an N-dimensional raster (top level).
+///
+/// Replaces the legacy `RasterRef` + `MetadataRef` + `BandsRef` hierarchy with
+/// a single flat interface. Bands are 0-indexed.
+pub trait RasterRef {
+ /// Number of bands/variables
+ fn num_bands(&self) -> usize;
+
+ /// Access a band by 0-based index
+ fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>>;
+
+ /// 1-based band-access view used by callers from before the N-D
+ /// refactor. Implementers typically write `Bands::new(self)`.
+ fn bands(&self) -> Bands<'_>;
+
+ /// Band name (e.g., Zarr variable name). None for unnamed bands.
+ fn band_name(&self, index: usize) -> Option<&str>;
+
+ /// Fast path for band data type — reads the scalar `data_type` column
+ /// without materialising a full `BandRef`. UDFs that only need this
+ /// metadata field should prefer this over `band(i)?.data_type()`.
+ /// Returns None if `index` is out of range or the discriminant is invalid.
+ ///
+ /// The default implementation delegates to `band(i)`. Backends with a
+ /// flat columnar layout should override for the no-allocation fast path.
+ fn band_data_type(&self, index: usize) -> Option<BandDataType> {
+ self.band(index).map(|b| b.data_type())
+ }
+
+ /// Fast path for band outdb URI — reads the `outdb_uri` column without
+ /// materialising a `BandRef`. Returns None if the band has no URI or
+ /// if `index` is out of range.
+ ///
+ /// The default implementation must allocate a `Box<dyn BandRef>`; the
+ /// raster-array backend overrides it to read the column directly.
+ /// Default returns None because the borrow can't outlive the boxed band.
+ fn band_outdb_uri(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band outdb format — reads the `outdb_format` column
+ /// without materialising a `BandRef`. Default returns None for the
+ /// same lifetime reason as `band_outdb_uri`.
+ fn band_outdb_format(&self, index: usize) -> Option<&str> {
+ let _ = index;
+ None
+ }
+
+ /// Fast path for band nodata bytes — reads the `nodata` column without
+ /// materialising a `BandRef`. Default returns None for the same
+ /// lifetime reason as `band_outdb_uri`.
+ fn band_nodata(&self, index: usize) -> Option<&[u8]> {
+ let _ = index;
+ None
+ }
+
+ /// CRS string (PROJJSON, WKT, or authority code). None if not set.
+ fn crs(&self) -> Option<&str>;
+
+ /// 6-element affine transform in GDAL GeoTransform order:
+ /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]`
+ fn transform(&self) -> &[f64];
+
+ /// Eagerly-computed concrete metadata view (width, height, geotransform
+ /// scalars). Mirrors the pre-N-D `RasterRef::metadata()` accessor.
+ ///
+ /// Panics if `spatial_shape` lacks width/height or `transform` is the
+ /// wrong length — those are corrupt-schema cases that error cleanly
+ /// through the `width()`/`height()` trait methods, but the metadata
+ /// accessor predates that contract and is kept infallible for caller
+ /// ergonomics.
+ fn metadata(&self) -> RasterMetadata {
+ let width = self
+ .width()
+ .expect("raster has no width (spatial_shape missing); use width()?
for error handling");
+ let height = self
+ .height()
+ .expect("raster has no height; use height()? for error handling");
+ let t = self.transform();
+ if t.len() != 6 {
+ panic!("transform must be 6 elements, got {}", t.len());
+ }
+ RasterMetadata {
+ width,
+ height,
+ upperleft_x: t[0],
+ scale_x: t[1],
+ skew_x: t[2],
+ upperleft_y: t[3],
+ skew_y: t[4],
+ scale_y: t[5],
+ }
+ }
+
+ /// Spatial dimension names, in order (today `["x","y"]`; a future Z phase
+ /// would extend to `["x","y","z"]`). Every band must contain each of these
+ /// names in its own `dim_names`, with matching sizes.
+ fn spatial_dims(&self) -> Vec<&str>;
+
+ /// Spatial dimension sizes, in the same order as `spatial_dims`. Today
+ /// `[width, height]`.
+ fn spatial_shape(&self) -> &[i64];
+
+ /// Name of the X spatial dimension (e.g., "x", "lon", "easting").
+ fn x_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().next().unwrap_or("x")
+ }
+
+ /// Name of the Y spatial dimension (e.g., "y", "lat", "northing").
+ fn y_dim(&self) -> &str {
+ let dims = self.spatial_dims();
+ dims.into_iter().nth(1).unwrap_or("y")
+ }
+
+ /// Width in pixels — size of the X spatial dimension from the top-level
+ /// `spatial_shape`. Errors if `spatial_shape` is empty or the X size is
+ /// negative; both are invariant violations rather than legitimate "no
+ /// value" states.
+ fn width(&self) -> Result<u64, ArrowError> {
+ let shape = self.spatial_shape();
+ let Some(&v) = shape.first() else {
+ return Err(ArrowError::InvalidArgumentError(
+ "raster has no width (spatial_shape is empty)".to_string(),
+ ));
+ };
+ if v < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster width must be non-negative, got {v}"
+ )));
+ }
+ Ok(v as u64)
+ }
+
+ /// Height in pixels — size of the Y spatial dimension from the top-level
+ /// `spatial_shape`. Errors if `spatial_shape` has fewer than two entries
+ /// or the Y size is negative.
+ fn height(&self) -> Result<u64, ArrowError> {
+ let shape = self.spatial_shape();
+ let Some(&v) = shape.get(1) else {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster has no height (spatial_shape has {} entries, need >=
2)",
+ shape.len()
+ )));
+ };
+ if v < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "raster height must be non-negative, got {v}"
+ )));
+ }
+ Ok(v as u64)
+ }
+
+ /// Look up a band by name. Returns None if no band has that name.
+ fn band_by_name(&self, name: &str) -> Option<Box<dyn BandRef + '_>> {
+ (0..self.num_bands())
+ .find(|&i| self.band_name(i) == Some(name))
+ .and_then(|i| self.band(i))
+ }
+}
+
+/// Trait for accessing a single band/variable within an N-D raster.
+///
+/// This is the consumer interface. Implementations handle storage details
+/// Two data access paths:
+/// - `contiguous_data()` — flat row-major bytes for consumers that don't need
+/// stride awareness (most RS_* functions, GDAL boundary, serialization).
+/// - `nd_buffer()` — raw buffer + shape + strides + offset for stride-aware
+/// consumers (numpy zero-copy views, Arrow FFI) that want to avoid copies.
pub trait BandRef {
- /// Band metadata accessor
- fn metadata(&self) -> &dyn BandMetadataRef;
- /// Raw band data as bytes (zero-copy access)
- fn data(&self) -> &[u8];
-}
-
-/// Trait for accessing individual band metadata
-pub trait BandMetadataRef {
- /// No-data value as raw bytes (None if null)
- fn nodata_value(&self) -> Option<&[u8]>;
- /// Storage type (InDb, OutDbRef, etc)
- fn storage_type(&self) -> Result<StorageType, ArrowError>;
- /// Band data type (UInt8, Float32, etc.)
- fn data_type(&self) -> Result<BandDataType, ArrowError>;
- /// OutDb URL (only used when storage_type == OutDbRef)
- fn outdb_url(&self) -> Option<&str>;
- /// OutDb band ID (only used when storage_type == OutDbRef)
- fn outdb_band_id(&self) -> Option<u32>;
-
- /// No-data value interpreted as f64.
+ // -- Dimension metadata --
+
+ /// Number of dimensions in this band
+ fn ndim(&self) -> usize;
+
+ /// Dimension names in order (e.g., `["time", "y", "x"]`)
+ fn dim_names(&self) -> Vec<&str>;
+
+ /// Visible shape — size of each dimension in the band's view, in
+ /// `dim_names` order. Derived from `view`: `[v.steps for v in view]`.
+ /// This is what almost all consumers want; use `raw_source_shape()` only
+ /// when you need to address into the raw `data` buffer (e.g. FFI).
+ fn shape(&self) -> &[u64];
+
+ /// **Internal/FFI-only.** Natural C-order extent of the band's
+ /// underlying `data` buffer, indexed by *source* axis (not visible
+ /// axis). Almost every consumer wants `shape()` instead — that is the
+ /// region the band exposes, and is what you compare against
+ /// `spatial_shape`, iterate over for pixels, and compose further views
+ /// against. The two only agree when the band's view is the identity;
+ /// any slice, broadcast, or permutation makes them diverge.
+ ///
+ /// Use this only when you need to index directly into the raw `data`
+ /// bytes (e.g. Arrow C Data Interface, numpy zero-copy views) and you
+ /// also handle `view()` and the byte-stride layout from `nd_buffer()`.
+ fn raw_source_shape(&self) -> &[u64];
+
+ /// Per-visible-dimension view entries describing how the band's
+ /// visible axes map onto its `source_shape`. `view().len() == ndim()`.
+ /// See `ViewEntry` for per-entry semantics.
+ fn view(&self) -> &[ViewEntry];
+
+ /// Size of a named dimension (None if doesn't exist)
+ fn dim_size(&self, name: &str) -> Option<u64> {
+ let idx = self.dim_index(name)?;
+ Some(self.shape()[idx])
+ }
+
+ /// Index of a named dimension (None if doesn't exist)
+ fn dim_index(&self, name: &str) -> Option<usize> {
+ self.dim_names().iter().position(|n| *n == name)
+ }
+
+ /// True iff this band is shaped exactly like a legacy 2-D raster band:
+ /// `dim_names == ["y", "x"]` and the view is the identity over the
+ /// band's `raw_source_shape` (no slice, no broadcast, no permutation).
+ ///
+ /// GDAL-backed SQL functions use this to refuse N-D bands cleanly while
+ /// they wait for an MDArray-aware port.
+ fn is_2d(&self) -> bool {
+ let dims = self.dim_names();
+ if dims.len() != 2 || dims[0] != "y" || dims[1] != "x" {
+ return false;
+ }
+ let view = self.view();
+ let source_shape = self.raw_source_shape();
+ if view.len() != 2 || source_shape.len() != 2 {
+ return false;
+ }
+ view.iter().enumerate().all(|(i, v)| {
+ v.source_axis as usize == i
+ && v.start == 0
+ && v.step == 1
+ && v.steps >= 0
+ && v.steps as u64 == source_shape[i]
+ })
+ }
+
+ // -- Band metadata --
+
+ /// Data type for all elements in this band
+ fn data_type(&self) -> BandDataType;
+
+ /// Nodata value as raw bytes (None if not set)
+ fn nodata(&self) -> Option<&[u8]>;
+
+ /// OutDb URI — location of the external resource (e.g.
+ /// `"s3://bucket/file.tif"`, `"file:///…"`, `"mem://…"`). None for
+ /// in-memory bands. Scheme resolution is delegated to an
+ /// `ObjectStoreRegistry`; it does *not* imply a format.
+ fn outdb_uri(&self) -> Option<&str> {
+ None
+ }
+
+ /// OutDb format — how to interpret the bytes at `outdb_uri`
+ /// (e.g. `"geotiff"`, `"zarr"`). None means in-memory — the band's
+ /// `contiguous_data()` / `nd_buffer()` is authoritative.
+ fn outdb_format(&self) -> Option<&str> {
+ None
+ }
+
+ /// True if this band's bytes live in the `data` buffer (in-database).
+ /// False if the bytes must be fetched from `outdb_uri` (out-of-database).
+ ///
+ /// The discriminator is whether the `data` buffer is non-empty —
+ /// `outdb_uri` and `outdb_format` are orthogonal location/format hints
+ /// that may be set on either kind of band.
+ fn is_indb(&self) -> bool {
+ // Default: materialize via nd_buffer and check buffer emptiness.
+ // Concrete impls should override with a direct buffer check.
+ self.nd_buffer().is_ok_and(|b| !b.buffer.is_empty())
+ }
+
+ /// Eagerly-computed concrete band metadata. Mirrors the pre-N-D
+ /// `BandRef::metadata()` accessor.
+ ///
+ /// `outdb_url` and `outdb_band_id` are parsed from `outdb_uri()`'s
+ /// SedonaDB `#band=N` fragment convention so callers that pattern-match
+ /// on those fields keep compiling.
+ fn metadata(&self) -> BandMetadata {
+ let is_indb = self.is_indb();
+ // Match the pre-N-D contract: outdb_url / outdb_band_id are only
+ // populated when storage_type is OutDbRef. PR-B's schema lets the
+ // URI hint coexist with InDb data; this surface hides that.
+ let (outdb_url, outdb_band_id) = if !is_indb {
+ match self.outdb_uri() {
+ Some(uri) => {
+ let (base, band) = split_outdb_band_fragment(uri);
+ (Some(base), Some(band))
+ }
+ None => (None, None),
+ }
+ } else {
+ (None, None)
+ };
+ BandMetadata {
+ nodata_value: self.nodata().map(|b| b.to_vec()),
+ storage_type: if is_indb {
+ sedona_schema::raster::StorageType::InDb
+ } else {
+ sedona_schema::raster::StorageType::OutDbRef
+ },
+ datatype: self.data_type(),
+ outdb_url,
+ outdb_band_id,
+ }
+ }
+
+ // -- Data access --
+
+ /// Raw backing buffer + visible-region layout. Triggers load for lazy
+ /// impls. The returned `NdBuffer` describes the band's view in
+ /// byte-stride terms — `shape` is the visible shape, `strides` and
+ /// `offset` are computed by composing the view with the source's
+ /// natural C-order byte strides. Strides may be zero (broadcast) or
+ /// negative (reverse iteration).
+ fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError>;
+
+ /// Contiguous row-major bytes covering the *visible* region. Zero-copy
+ /// (`Cow::Borrowed`) when the view is full identity over a C-order
+ /// source buffer; copies into a new buffer when the view slices,
+ /// broadcasts, or permutes. Most RS_* functions use this.
+ fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError>;
+
+ /// Pre-N-D compatibility shim: raw row-major bytes for InDb,
+ /// identity-view bands. Panics on anything else (OutDb, non-identity
+ /// view, or a `contiguous_data` error) — corresponds to main's
+ /// infallible `BandRef::data() -> &[u8]` which only ever ran against
+ /// identity-view InDb bands.
+ fn data(&self) -> &[u8] {
+ // Default impl forwards through nd_buffer's borrowed slice. This
+ // only borrows the underlying band buffer for identity-view InDb
+ // bands; everything else is a corrupt-shape call site.
+ self.nd_buffer()
+ .expect("BandRef::data() requires an in-db band with bytes")
+ .buffer
+ }
+
+ /// Nodata value interpreted as f64.
///
/// Returns `Ok(None)` when no nodata value is defined, `Ok(Some(f64))` on
- /// success, or an error when the raw bytes have an unexpected length for
- /// the band's data type.
- fn nodata_value_as_f64(&self) -> Result<Option<f64>, ArrowError> {
- let bytes = match self.nodata_value() {
+ /// success, or an error when the raw bytes have an unexpected length
**or**
+ /// when the nodata value cannot be represented exactly in `f64`.
+ ///
+ /// 64-bit integer bands (`Int64`, `UInt64`) error rather than silently
+ /// rounding when the magnitude exceeds 2^53 — values outside
+ /// `[-9_007_199_254_740_992, 9_007_199_254_740_992]` can't round-trip
+ /// through `f64` and a rounded sentinel can collide with a real pixel
+ /// value. Use `nodata()` directly to recover the exact bytes when full
+ /// integer precision matters (e.g. when nodata is the type's extreme
+ /// value like `0xFF…FF`).
+ fn nodata_as_f64(&self) -> Result<Option<f64>, ArrowError> {
+ let bytes = match self.nodata() {
Some(b) => b,
None => return Ok(None),
};
- let dt = self.data_type()?;
- nodata_bytes_to_f64(bytes, &dt).map(Some)
+ nodata_bytes_to_f64_lossless(bytes, &self.data_type()).map(Some)
}
}
/// Convert raw nodata bytes to f64 given a [`BandDataType`].
///
/// The bytes are expected to be in little-endian order and exactly match the
/// byte size of the data type.
-fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result<f64,
ArrowError> {
+pub fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result<f64,
ArrowError> {
Review Comment:
```suggestion
fn nodata_bytes_to_f64(bytes: &[u8], dt: &BandDataType) -> Result<f64,
ArrowError> {
```
##########
rust/sedona-raster/src/traits.rs:
##########
@@ -217,4 +777,208 @@ mod tests {
let result = nodata_bytes_to_f64(&[1, 2, 3], &BandDataType::Float64);
assert!(result.is_err());
}
+
+ #[test]
+ fn test_nodata_as_f64_int64_loses_precision_above_2_pow_53() {
+ // Locks in the documented warning: nodata bytes for Int64 values
+ // beyond f64's 53-bit mantissa silently round on conversion.
+ // The expected f64 is hard-coded — deriving it via `as f64` would
+ // mean the test invokes the same primitive cast it claims to test.
+ let big = (1i64 << 53) + 1; // 2^53 + 1; not representable in f64
+ let bytes = big.to_le_bytes();
+ let val = nodata_bytes_to_f64(&bytes, &BandDataType::Int64).unwrap();
+ assert_eq!(val, 9007199254740992.0_f64);
+ assert_ne!(val as i64, big);
+ }
Review Comment:
```suggestion
```
##########
rust/sedona-raster/src/array.rs:
##########
@@ -15,445 +15,431 @@
// specific language governing permissions and limitations
// under the License.
+use std::borrow::Cow;
+
use arrow_array::{
- Array, BinaryArray, BinaryViewArray, Float64Array, ListArray, StringArray,
StringViewArray,
- StructArray, UInt32Array, UInt64Array,
+ Array, BinaryArray, BinaryViewArray, Float64Array, Int64Array, ListArray,
StringArray,
+ StringViewArray, StructArray, UInt32Array, UInt64Array,
};
use arrow_schema::ArrowError;
-use crate::traits::{
- BandIterator, BandMetadataRef, BandRef, BandsRef, MetadataRef,
RasterMetadata, RasterRef,
-};
-use sedona_schema::raster::{
- band_indices, band_metadata_indices, metadata_indices, raster_indices,
BandDataType,
- StorageType,
-};
+use crate::traits::{BandRef, Bands, NdBuffer, RasterRef, ViewEntry};
+use sedona_schema::raster::{band_indices, raster_indices, BandDataType};
-/// Implement MetadataRef for RasterMetadata to allow direct use with builder
-impl MetadataRef for RasterMetadata {
- fn width(&self) -> u64 {
- self.width
- }
- fn height(&self) -> u64 {
- self.height
- }
- fn upper_left_x(&self) -> f64 {
- self.upperleft_x
- }
- fn upper_left_y(&self) -> f64 {
- self.upperleft_y
- }
- fn scale_x(&self) -> f64 {
- self.scale_x
- }
- fn scale_y(&self) -> f64 {
- self.scale_y
- }
- fn skew_x(&self) -> f64 {
- self.skew_x
- }
- fn skew_y(&self) -> f64 {
- self.skew_y
- }
-}
-
-/// Implementation of MetadataRef for Arrow StructArray
-struct MetadataRefImpl<'a> {
- width_array: &'a UInt64Array,
- height_array: &'a UInt64Array,
- upper_left_x_array: &'a Float64Array,
- upper_left_y_array: &'a Float64Array,
- scale_x_array: &'a Float64Array,
- scale_y_array: &'a Float64Array,
- skew_x_array: &'a Float64Array,
- skew_y_array: &'a Float64Array,
- index: usize,
+/// Arrow-backed implementation of BandRef for a single band within a raster.
+///
+/// Today this handles only the canonical identity view: `view_entries` is
+/// synthesised from `source_shape`, `visible_shape == source_shape`,
+/// and `byte_strides` are plain C-order strides with `byte_offset = 0`.
+struct BandRefImpl<'a> {
+ dim_names_list: &'a ListArray,
+ dim_names_values: &'a StringArray,
+ source_shape_list: &'a ListArray,
+ source_shape_values: &'a UInt64Array,
+ nodata_array: &'a BinaryArray,
+ outdb_uri_array: &'a StringArray,
+ outdb_format_array: &'a StringViewArray,
+ data_array: &'a BinaryViewArray,
+ /// Absolute row index within the flattened bands arrays
+ band_row: usize,
+ /// Resolved at construction so accessors don't re-decode the discriminant.
+ data_type: BandDataType,
+ /// Per-visible-axis view, length = ndim. Always identity today.
+ view_entries: Vec<ViewEntry>,
+ /// Visible shape, length = ndim. Equals `source_shape` today.
+ visible_shape: Vec<u64>,
+ /// Byte strides per visible axis. C-order over `source_shape` today.
+ byte_strides: Vec<i64>,
+ /// Byte offset into `data` of the visible region's `[0,...,0]` element.
+ byte_offset: u64,
}
-impl<'a> MetadataRef for MetadataRefImpl<'a> {
- #[inline(always)]
- fn width(&self) -> u64 {
- self.width_array.value(self.index)
- }
-
- #[inline(always)]
- fn height(&self) -> u64 {
- self.height_array.value(self.index)
+impl<'a> BandRef for BandRefImpl<'a> {
+ fn ndim(&self) -> usize {
+ self.view_entries.len()
}
- #[inline(always)]
- fn upper_left_x(&self) -> f64 {
- self.upper_left_x_array.value(self.index)
+ fn dim_names(&self) -> Vec<&str> {
+ let start = self.dim_names_list.value_offsets()[self.band_row] as
usize;
+ let end = self.dim_names_list.value_offsets()[self.band_row + 1] as
usize;
+ (start..end)
+ .map(|i| self.dim_names_values.value(i))
+ .collect()
}
- #[inline(always)]
- fn upper_left_y(&self) -> f64 {
- self.upper_left_y_array.value(self.index)
+ fn shape(&self) -> &[u64] {
+ &self.visible_shape
}
- #[inline(always)]
- fn scale_x(&self) -> f64 {
- self.scale_x_array.value(self.index)
+ fn raw_source_shape(&self) -> &[u64] {
+ let start = self.source_shape_list.value_offsets()[self.band_row] as
usize;
+ let end = self.source_shape_list.value_offsets()[self.band_row + 1] as
usize;
+ &self.source_shape_values.values()[start..end]
}
- #[inline(always)]
- fn scale_y(&self) -> f64 {
- self.scale_y_array.value(self.index)
+ fn view(&self) -> &[ViewEntry] {
+ &self.view_entries
}
- #[inline(always)]
- fn skew_x(&self) -> f64 {
- self.skew_x_array.value(self.index)
+ fn data_type(&self) -> BandDataType {
+ self.data_type
}
- #[inline(always)]
- fn skew_y(&self) -> f64 {
- self.skew_y_array.value(self.index)
+ fn data(&self) -> &[u8] {
+ // Pre-N-D compatibility surface: returns the raw `data` column bytes
+ // verbatim. For InDb identity-view bands this is the row-major buffer
+ // callers from main expect. For OutDb it's `&[]` — same shape as
+ // main, which let callers see "no in-line bytes" without panicking.
+ self.data_array.value(self.band_row)
}
-}
-
-/// Implementation of BandMetadataRef for Arrow StructArray
-struct BandMetadataRefImpl<'a> {
- nodata_array: &'a BinaryArray,
- storage_type_array: &'a UInt32Array,
- datatype_array: &'a UInt32Array,
- outdb_url_array: &'a StringArray,
- outdb_band_id_array: &'a UInt32Array,
- band_index: usize,
-}
-impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> {
- fn nodata_value(&self) -> Option<&[u8]> {
- if self.nodata_array.is_null(self.band_index) {
+ fn nodata(&self) -> Option<&[u8]> {
+ if self.nodata_array.is_null(self.band_row) {
None
} else {
- Some(self.nodata_array.value(self.band_index))
+ Some(self.nodata_array.value(self.band_row))
}
}
- fn storage_type(&self) -> Result<StorageType, ArrowError> {
- let value = self.storage_type_array.value(self.band_index);
- let storage_type = match value {
- 0 => StorageType::InDb,
- 1 => StorageType::OutDbRef,
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Unknown storage type: {}",
- value
- )))
- }
- };
- Ok(storage_type)
- }
-
- fn data_type(&self) -> Result<BandDataType, ArrowError> {
- let value = self.datatype_array.value(self.band_index);
- let band_data_type = match value {
- 1 => BandDataType::UInt8,
- 2 => BandDataType::UInt16,
- 3 => BandDataType::Int16,
- 4 => BandDataType::UInt32,
- 5 => BandDataType::Int32,
- 6 => BandDataType::Float32,
- 7 => BandDataType::Float64,
- 8 => BandDataType::UInt64,
- 9 => BandDataType::Int64,
- 10 => BandDataType::Int8,
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Unknown band data type: {}",
- self.datatype_array.value(self.band_index)
- )))
- }
- };
- Ok(band_data_type)
- }
-
- fn outdb_url(&self) -> Option<&str> {
- if self.outdb_url_array.is_null(self.band_index) {
+ fn outdb_uri(&self) -> Option<&str> {
+ if self.outdb_uri_array.is_null(self.band_row) {
None
} else {
- Some(self.outdb_url_array.value(self.band_index))
+ Some(self.outdb_uri_array.value(self.band_row))
}
}
- fn outdb_band_id(&self) -> Option<u32> {
- if self.outdb_band_id_array.is_null(self.band_index) {
+ fn outdb_format(&self) -> Option<&str> {
+ if self.outdb_format_array.is_null(self.band_row) {
None
} else {
- Some(self.outdb_band_id_array.value(self.band_index))
+ Some(self.outdb_format_array.value(self.band_row))
}
}
-}
-/// Implementation of BandRef for accessing individual band data
-struct BandRefImpl<'a> {
- band_metadata: BandMetadataRefImpl<'a>,
- band_data: &'a [u8],
-}
+ fn is_indb(&self) -> bool {
+ !self.data_array.value(self.band_row).is_empty()
+ }
-impl<'a> BandRef for BandRefImpl<'a> {
- fn metadata(&self) -> &dyn BandMetadataRef {
- &self.band_metadata
+ fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError> {
+ if !self.is_indb() {
+ return Err(ArrowError::NotYetImplemented(
+ "OutDb byte access via nd_buffer() is not yet implemented; \
+ backend-specific OutDb resolvers are tracked separately"
+ .to_string(),
+ ));
+ }
+ // shape and strides are owned by NdBuffer (see its doc comment).
+ // Cloning here is cheap — both vecs are O(ndim), a handful of values.
+ Ok(NdBuffer {
+ buffer: self.data_array.value(self.band_row),
+ shape: self.visible_shape.clone(),
+ strides: self.byte_strides.clone(),
+ offset: self.byte_offset,
+ data_type: self.data_type,
+ })
}
- fn data(&self) -> &[u8] {
- self.band_data
+ fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError> {
+ if !self.is_indb() {
+ return Err(ArrowError::NotYetImplemented(
+ "OutDb byte access via contiguous_data() is not yet
implemented; \
+ backend-specific OutDb resolvers are tracked separately"
+ .to_string(),
+ ));
+ }
+ // Identity-view only today, so the data buffer is already row-major
+ // over the visible region.
+ Ok(Cow::Borrowed(self.data_array.value(self.band_row)))
}
}
-/// Implementation of BandsRef for accessing all bands in a raster
-struct BandsRefImpl<'a> {
+/// Arrow-backed implementation of RasterRef for a single raster row.
+///
+/// Holds flat references to the underlying Arrow arrays so the impl does
+/// not borrow from a `RasterStructArray` wrapper. That keeps
+/// `RasterStructArray::get(&self, ...)` callable without a `&'a self`
+/// constraint, which would otherwise force callers to hoist the
+/// `RasterStructArray` into a `let` binding.
+pub struct RasterRefImpl<'a> {
+ crs_array: &'a StringViewArray,
+ transform_list: &'a ListArray,
+ transform_values: &'a Float64Array,
+ spatial_dims_list: &'a ListArray,
+ spatial_dims_values: &'a StringViewArray,
+ spatial_shape_list: &'a ListArray,
+ spatial_shape_values: &'a Int64Array,
bands_list: &'a ListArray,
- raster_index: usize,
- // Direct references to the metadata and data arrays
- nodata_array: &'a BinaryArray,
- storage_type_array: &'a UInt32Array,
- datatype_array: &'a UInt32Array,
- outdb_url_array: &'a StringArray,
- outdb_band_id_array: &'a UInt32Array,
+ band_name_array: &'a StringArray,
+ band_dim_names_list: &'a ListArray,
+ band_dim_names_values: &'a StringArray,
+ band_source_shape_list: &'a ListArray,
+ band_source_shape_values: &'a UInt64Array,
+ band_datatype_array: &'a UInt32Array,
+ band_nodata_array: &'a BinaryArray,
+ band_view_list: &'a ListArray,
+ band_outdb_uri_array: &'a StringArray,
+ band_outdb_format_array: &'a StringViewArray,
band_data_array: &'a BinaryViewArray,
+ raster_index: usize,
+}
+
+impl<'a> RasterRefImpl<'a> {
+ /// Returns the raw CRS string reference with the array's lifetime.
+ pub fn crs_str_ref(&self) -> Option<&'a str> {
+ if self.crs_array.is_null(self.raster_index) {
+ None
+ } else {
+ Some(self.crs_array.value(self.raster_index))
+ }
+ }
}
-impl<'a> BandsRef for BandsRefImpl<'a> {
- fn len(&self) -> usize {
+impl<'a> RasterRef for RasterRefImpl<'a> {
+ fn num_bands(&self) -> usize {
self.bands_list.value_length(self.raster_index) as usize
}
- /// Get a specific band by number (1-based index)
- fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError>
{
- if number == 0 {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Invalid band number {number}: band numbers must be 1-based"
- )));
- }
- // By convention, band numbers are 1-based.
- // Convert to zero-based index.
- let index = number - 1;
- if index >= self.len() {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Band number {} is out of range: this raster has {} bands",
- number,
- self.len()
- )));
- }
+ fn bands(&self) -> Bands<'_> {
+ Bands::new(self)
+ }
+ fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>> {
+ if index >= self.num_bands() {
+ return None;
+ }
let start = self.bands_list.value_offsets()[self.raster_index] as
usize;
let band_row = start + index;
- let band_metadata = BandMetadataRefImpl {
- nodata_array: self.nodata_array,
- storage_type_array: self.storage_type_array,
- datatype_array: self.datatype_array,
- outdb_url_array: self.outdb_url_array,
- outdb_band_id_array: self.outdb_band_id_array,
- band_index: band_row,
- };
-
- let band_data = self.band_data_array.value(band_row);
+ // Read source shape slice.
+ let ss_start = self.band_source_shape_list.value_offsets()[band_row]
as usize;
+ let ss_end = self.band_source_shape_list.value_offsets()[band_row + 1]
as usize;
+ let source_shape: &[u64] =
&self.band_source_shape_values.values()[ss_start..ss_end];
- Ok(Box::new(BandRefImpl {
- band_metadata,
- band_data,
- }))
- }
-
- fn iter(&self) -> Box<dyn BandIterator<'_> + '_> {
- Box::new(BandIteratorImpl {
- bands: self,
- current: 1, // Start at 1 for 1-based band numbering
- })
- }
-}
+ // Reject 0-D bands at the read boundary. Schema doesn't forbid them
+ // outright but every consumer assumes ndim >= 1.
+ if source_shape.is_empty() {
+ return None;
+ }
-/// Concrete implementation of BandIterator trait
-pub struct BandIteratorImpl<'a> {
- bands: &'a dyn BandsRef,
- current: usize,
-}
+ // Resolve data type up front; an unknown discriminant is a
+ // schema-corruption bug, not user data, so failing the band is
+ // appropriate.
+ let data_type_value = self.band_datatype_array.value(band_row);
+ let data_type = BandDataType::try_from_u32(data_type_value)?;
+
+ // Only the canonical identity view (null view row) is written today.
+ // A non-null view row would require the view → byte-stride composition
+ // path that is deferred to a follow-up; reject it here so callers see
+ // a clean "no band" rather than a panic.
+ if !self.band_view_list.is_null(band_row) {
+ return None;
+ }
Review Comment:
Should this be `sedona_internal_err!()` so it's harder to forget about?
##########
rust/sedona-raster/src/array.rs:
##########
@@ -15,445 +15,431 @@
// specific language governing permissions and limitations
// under the License.
+use std::borrow::Cow;
+
use arrow_array::{
- Array, BinaryArray, BinaryViewArray, Float64Array, ListArray, StringArray,
StringViewArray,
- StructArray, UInt32Array, UInt64Array,
+ Array, BinaryArray, BinaryViewArray, Float64Array, Int64Array, ListArray,
StringArray,
+ StringViewArray, StructArray, UInt32Array, UInt64Array,
};
use arrow_schema::ArrowError;
-use crate::traits::{
- BandIterator, BandMetadataRef, BandRef, BandsRef, MetadataRef,
RasterMetadata, RasterRef,
-};
-use sedona_schema::raster::{
- band_indices, band_metadata_indices, metadata_indices, raster_indices,
BandDataType,
- StorageType,
-};
+use crate::traits::{BandRef, Bands, NdBuffer, RasterRef, ViewEntry};
+use sedona_schema::raster::{band_indices, raster_indices, BandDataType};
-/// Implement MetadataRef for RasterMetadata to allow direct use with builder
-impl MetadataRef for RasterMetadata {
- fn width(&self) -> u64 {
- self.width
- }
- fn height(&self) -> u64 {
- self.height
- }
- fn upper_left_x(&self) -> f64 {
- self.upperleft_x
- }
- fn upper_left_y(&self) -> f64 {
- self.upperleft_y
- }
- fn scale_x(&self) -> f64 {
- self.scale_x
- }
- fn scale_y(&self) -> f64 {
- self.scale_y
- }
- fn skew_x(&self) -> f64 {
- self.skew_x
- }
- fn skew_y(&self) -> f64 {
- self.skew_y
- }
-}
-
-/// Implementation of MetadataRef for Arrow StructArray
-struct MetadataRefImpl<'a> {
- width_array: &'a UInt64Array,
- height_array: &'a UInt64Array,
- upper_left_x_array: &'a Float64Array,
- upper_left_y_array: &'a Float64Array,
- scale_x_array: &'a Float64Array,
- scale_y_array: &'a Float64Array,
- skew_x_array: &'a Float64Array,
- skew_y_array: &'a Float64Array,
- index: usize,
+/// Arrow-backed implementation of BandRef for a single band within a raster.
+///
+/// Today this handles only the canonical identity view: `view_entries` is
+/// synthesised from `source_shape`, `visible_shape == source_shape`,
+/// and `byte_strides` are plain C-order strides with `byte_offset = 0`.
+struct BandRefImpl<'a> {
+ dim_names_list: &'a ListArray,
+ dim_names_values: &'a StringArray,
+ source_shape_list: &'a ListArray,
+ source_shape_values: &'a UInt64Array,
+ nodata_array: &'a BinaryArray,
+ outdb_uri_array: &'a StringArray,
+ outdb_format_array: &'a StringViewArray,
+ data_array: &'a BinaryViewArray,
+ /// Absolute row index within the flattened bands arrays
+ band_row: usize,
+ /// Resolved at construction so accessors don't re-decode the discriminant.
+ data_type: BandDataType,
+ /// Per-visible-axis view, length = ndim. Always identity today.
+ view_entries: Vec<ViewEntry>,
+ /// Visible shape, length = ndim. Equals `source_shape` today.
+ visible_shape: Vec<u64>,
+ /// Byte strides per visible axis. C-order over `source_shape` today.
+ byte_strides: Vec<i64>,
+ /// Byte offset into `data` of the visible region's `[0,...,0]` element.
+ byte_offset: u64,
}
-impl<'a> MetadataRef for MetadataRefImpl<'a> {
- #[inline(always)]
- fn width(&self) -> u64 {
- self.width_array.value(self.index)
- }
-
- #[inline(always)]
- fn height(&self) -> u64 {
- self.height_array.value(self.index)
+impl<'a> BandRef for BandRefImpl<'a> {
+ fn ndim(&self) -> usize {
+ self.view_entries.len()
}
- #[inline(always)]
- fn upper_left_x(&self) -> f64 {
- self.upper_left_x_array.value(self.index)
+ fn dim_names(&self) -> Vec<&str> {
+ let start = self.dim_names_list.value_offsets()[self.band_row] as
usize;
+ let end = self.dim_names_list.value_offsets()[self.band_row + 1] as
usize;
+ (start..end)
+ .map(|i| self.dim_names_values.value(i))
+ .collect()
}
- #[inline(always)]
- fn upper_left_y(&self) -> f64 {
- self.upper_left_y_array.value(self.index)
+ fn shape(&self) -> &[u64] {
+ &self.visible_shape
}
- #[inline(always)]
- fn scale_x(&self) -> f64 {
- self.scale_x_array.value(self.index)
+ fn raw_source_shape(&self) -> &[u64] {
+ let start = self.source_shape_list.value_offsets()[self.band_row] as
usize;
+ let end = self.source_shape_list.value_offsets()[self.band_row + 1] as
usize;
+ &self.source_shape_values.values()[start..end]
}
- #[inline(always)]
- fn scale_y(&self) -> f64 {
- self.scale_y_array.value(self.index)
+ fn view(&self) -> &[ViewEntry] {
+ &self.view_entries
}
- #[inline(always)]
- fn skew_x(&self) -> f64 {
- self.skew_x_array.value(self.index)
+ fn data_type(&self) -> BandDataType {
+ self.data_type
}
- #[inline(always)]
- fn skew_y(&self) -> f64 {
- self.skew_y_array.value(self.index)
+ fn data(&self) -> &[u8] {
+ // Pre-N-D compatibility surface: returns the raw `data` column bytes
+ // verbatim. For InDb identity-view bands this is the row-major buffer
+ // callers from main expect. For OutDb it's `&[]` — same shape as
+ // main, which let callers see "no in-line bytes" without panicking.
+ self.data_array.value(self.band_row)
}
-}
-
-/// Implementation of BandMetadataRef for Arrow StructArray
-struct BandMetadataRefImpl<'a> {
- nodata_array: &'a BinaryArray,
- storage_type_array: &'a UInt32Array,
- datatype_array: &'a UInt32Array,
- outdb_url_array: &'a StringArray,
- outdb_band_id_array: &'a UInt32Array,
- band_index: usize,
-}
-impl<'a> BandMetadataRef for BandMetadataRefImpl<'a> {
- fn nodata_value(&self) -> Option<&[u8]> {
- if self.nodata_array.is_null(self.band_index) {
+ fn nodata(&self) -> Option<&[u8]> {
+ if self.nodata_array.is_null(self.band_row) {
None
} else {
- Some(self.nodata_array.value(self.band_index))
+ Some(self.nodata_array.value(self.band_row))
}
}
- fn storage_type(&self) -> Result<StorageType, ArrowError> {
- let value = self.storage_type_array.value(self.band_index);
- let storage_type = match value {
- 0 => StorageType::InDb,
- 1 => StorageType::OutDbRef,
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Unknown storage type: {}",
- value
- )))
- }
- };
- Ok(storage_type)
- }
-
- fn data_type(&self) -> Result<BandDataType, ArrowError> {
- let value = self.datatype_array.value(self.band_index);
- let band_data_type = match value {
- 1 => BandDataType::UInt8,
- 2 => BandDataType::UInt16,
- 3 => BandDataType::Int16,
- 4 => BandDataType::UInt32,
- 5 => BandDataType::Int32,
- 6 => BandDataType::Float32,
- 7 => BandDataType::Float64,
- 8 => BandDataType::UInt64,
- 9 => BandDataType::Int64,
- 10 => BandDataType::Int8,
- _ => {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Unknown band data type: {}",
- self.datatype_array.value(self.band_index)
- )))
- }
- };
- Ok(band_data_type)
- }
-
- fn outdb_url(&self) -> Option<&str> {
- if self.outdb_url_array.is_null(self.band_index) {
+ fn outdb_uri(&self) -> Option<&str> {
+ if self.outdb_uri_array.is_null(self.band_row) {
None
} else {
- Some(self.outdb_url_array.value(self.band_index))
+ Some(self.outdb_uri_array.value(self.band_row))
}
}
- fn outdb_band_id(&self) -> Option<u32> {
- if self.outdb_band_id_array.is_null(self.band_index) {
+ fn outdb_format(&self) -> Option<&str> {
+ if self.outdb_format_array.is_null(self.band_row) {
None
} else {
- Some(self.outdb_band_id_array.value(self.band_index))
+ Some(self.outdb_format_array.value(self.band_row))
}
}
-}
-/// Implementation of BandRef for accessing individual band data
-struct BandRefImpl<'a> {
- band_metadata: BandMetadataRefImpl<'a>,
- band_data: &'a [u8],
-}
+ fn is_indb(&self) -> bool {
+ !self.data_array.value(self.band_row).is_empty()
+ }
-impl<'a> BandRef for BandRefImpl<'a> {
- fn metadata(&self) -> &dyn BandMetadataRef {
- &self.band_metadata
+ fn nd_buffer(&self) -> Result<NdBuffer<'_>, ArrowError> {
+ if !self.is_indb() {
+ return Err(ArrowError::NotYetImplemented(
+ "OutDb byte access via nd_buffer() is not yet implemented; \
+ backend-specific OutDb resolvers are tracked separately"
+ .to_string(),
+ ));
+ }
+ // shape and strides are owned by NdBuffer (see its doc comment).
+ // Cloning here is cheap — both vecs are O(ndim), a handful of values.
+ Ok(NdBuffer {
+ buffer: self.data_array.value(self.band_row),
+ shape: self.visible_shape.clone(),
+ strides: self.byte_strides.clone(),
+ offset: self.byte_offset,
+ data_type: self.data_type,
+ })
}
- fn data(&self) -> &[u8] {
- self.band_data
+ fn contiguous_data(&self) -> Result<Cow<'_, [u8]>, ArrowError> {
+ if !self.is_indb() {
+ return Err(ArrowError::NotYetImplemented(
+ "OutDb byte access via contiguous_data() is not yet
implemented; \
+ backend-specific OutDb resolvers are tracked separately"
+ .to_string(),
+ ));
+ }
+ // Identity-view only today, so the data buffer is already row-major
+ // over the visible region.
+ Ok(Cow::Borrowed(self.data_array.value(self.band_row)))
}
}
-/// Implementation of BandsRef for accessing all bands in a raster
-struct BandsRefImpl<'a> {
+/// Arrow-backed implementation of RasterRef for a single raster row.
+///
+/// Holds flat references to the underlying Arrow arrays so the impl does
+/// not borrow from a `RasterStructArray` wrapper. That keeps
+/// `RasterStructArray::get(&self, ...)` callable without a `&'a self`
+/// constraint, which would otherwise force callers to hoist the
+/// `RasterStructArray` into a `let` binding.
+pub struct RasterRefImpl<'a> {
+ crs_array: &'a StringViewArray,
+ transform_list: &'a ListArray,
+ transform_values: &'a Float64Array,
+ spatial_dims_list: &'a ListArray,
+ spatial_dims_values: &'a StringViewArray,
+ spatial_shape_list: &'a ListArray,
+ spatial_shape_values: &'a Int64Array,
bands_list: &'a ListArray,
- raster_index: usize,
- // Direct references to the metadata and data arrays
- nodata_array: &'a BinaryArray,
- storage_type_array: &'a UInt32Array,
- datatype_array: &'a UInt32Array,
- outdb_url_array: &'a StringArray,
- outdb_band_id_array: &'a UInt32Array,
+ band_name_array: &'a StringArray,
+ band_dim_names_list: &'a ListArray,
+ band_dim_names_values: &'a StringArray,
+ band_source_shape_list: &'a ListArray,
+ band_source_shape_values: &'a UInt64Array,
+ band_datatype_array: &'a UInt32Array,
+ band_nodata_array: &'a BinaryArray,
+ band_view_list: &'a ListArray,
+ band_outdb_uri_array: &'a StringArray,
+ band_outdb_format_array: &'a StringViewArray,
band_data_array: &'a BinaryViewArray,
+ raster_index: usize,
+}
+
+impl<'a> RasterRefImpl<'a> {
+ /// Returns the raw CRS string reference with the array's lifetime.
+ pub fn crs_str_ref(&self) -> Option<&'a str> {
+ if self.crs_array.is_null(self.raster_index) {
+ None
+ } else {
+ Some(self.crs_array.value(self.raster_index))
+ }
+ }
}
-impl<'a> BandsRef for BandsRefImpl<'a> {
- fn len(&self) -> usize {
+impl<'a> RasterRef for RasterRefImpl<'a> {
+ fn num_bands(&self) -> usize {
self.bands_list.value_length(self.raster_index) as usize
}
- /// Get a specific band by number (1-based index)
- fn band(&self, number: usize) -> Result<Box<dyn BandRef + '_>, ArrowError>
{
- if number == 0 {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Invalid band number {number}: band numbers must be 1-based"
- )));
- }
- // By convention, band numbers are 1-based.
- // Convert to zero-based index.
- let index = number - 1;
- if index >= self.len() {
- return Err(ArrowError::InvalidArgumentError(format!(
- "Band number {} is out of range: this raster has {} bands",
- number,
- self.len()
- )));
- }
+ fn bands(&self) -> Bands<'_> {
+ Bands::new(self)
+ }
+ fn band(&self, index: usize) -> Option<Box<dyn BandRef + '_>> {
+ if index >= self.num_bands() {
+ return None;
+ }
let start = self.bands_list.value_offsets()[self.raster_index] as
usize;
let band_row = start + index;
- let band_metadata = BandMetadataRefImpl {
- nodata_array: self.nodata_array,
- storage_type_array: self.storage_type_array,
- datatype_array: self.datatype_array,
- outdb_url_array: self.outdb_url_array,
- outdb_band_id_array: self.outdb_band_id_array,
- band_index: band_row,
- };
-
- let band_data = self.band_data_array.value(band_row);
+ // Read source shape slice.
+ let ss_start = self.band_source_shape_list.value_offsets()[band_row]
as usize;
+ let ss_end = self.band_source_shape_list.value_offsets()[band_row + 1]
as usize;
+ let source_shape: &[u64] =
&self.band_source_shape_values.values()[ss_start..ss_end];
- Ok(Box::new(BandRefImpl {
- band_metadata,
- band_data,
- }))
- }
-
- fn iter(&self) -> Box<dyn BandIterator<'_> + '_> {
- Box::new(BandIteratorImpl {
- bands: self,
- current: 1, // Start at 1 for 1-based band numbering
- })
- }
-}
+ // Reject 0-D bands at the read boundary. Schema doesn't forbid them
+ // outright but every consumer assumes ndim >= 1.
+ if source_shape.is_empty() {
+ return None;
+ }
-/// Concrete implementation of BandIterator trait
-pub struct BandIteratorImpl<'a> {
- bands: &'a dyn BandsRef,
- current: usize,
-}
+ // Resolve data type up front; an unknown discriminant is a
+ // schema-corruption bug, not user data, so failing the band is
+ // appropriate.
+ let data_type_value = self.band_datatype_array.value(band_row);
+ let data_type = BandDataType::try_from_u32(data_type_value)?;
+
+ // Only the canonical identity view (null view row) is written today.
+ // A non-null view row would require the view → byte-stride composition
+ // path that is deferred to a follow-up; reject it here so callers see
+ // a clean "no band" rather than a panic.
+ if !self.band_view_list.is_null(band_row) {
+ return None;
+ }
+ let view_entries: Vec<ViewEntry> = source_shape
+ .iter()
+ .enumerate()
+ .map(|(i, &s)| ViewEntry {
+ source_axis: i as i64,
+ start: 0,
+ step: 1,
+ steps: s as i64,
+ })
+ .collect();
-impl<'a> Iterator for BandIteratorImpl<'a> {
- type Item = Box<dyn BandRef + 'a>;
+ let visible_shape: Vec<u64> = source_shape.to_vec();
- fn next(&mut self) -> Option<Self::Item> {
- // current is 1-based, compare against len() + 1
- if self.current <= self.bands.len() {
- let band = self.bands.band(self.current).ok(); // Convert Result
to Option
- self.current += 1;
- band
- } else {
- None
+ let dtype_size = data_type.byte_size() as i64;
+ // C-order byte strides over the source_shape:
+ // byte_strides[k] = dtype_size * Π_{j>k} source_shape[j]
Review Comment:
Is this needed?
```suggestion
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]