james-willis commented on code in PR #749:
URL: https://github.com/apache/sedona-db/pull/749#discussion_r3228827570
##########
rust/sedona-raster/src/builder.rs:
##########
@@ -17,325 +17,580 @@
use arrow_array::{
builder::{
- BinaryBuilder, BinaryViewBuilder, BooleanBuilder, Float64Builder,
StringBuilder,
- StringViewBuilder, UInt32Builder, UInt64Builder,
+ ArrayBuilder, BinaryBuilder, BinaryViewBuilder, BooleanBuilder,
Float64Builder,
+ Int64Builder, StringBuilder, StringViewBuilder, UInt32Builder,
UInt64Builder,
},
Array, ArrayRef, ListArray, StructArray,
};
-use arrow_buffer::{OffsetBuffer, ScalarBuffer};
-use arrow_schema::{ArrowError, DataType};
+use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
+use arrow_schema::ArrowError;
use std::sync::Arc;
+use sedona_schema::raster::BandDataType;
use sedona_schema::raster::RasterSchema;
-use crate::traits::{BandMetadata, MetadataRef};
+use arrow_schema::DataType;
-/// Builder for constructing raster arrays with zero-copy band data writing
+/// Builder for constructing N-D raster arrays.
///
-/// Required steps to build a raster:
-/// 1. Create a RasterBuilder with a specified capacity
-/// 2. For each raster to add:
-/// - Call `start_raster` with the appropriate metadata, CRS
-/// - For each band in the raster:
-/// - Call `start_band` with the band metadata
-/// - Use `band_data_writer` to get a BinaryViewBuilder and write the
band data
-/// - Call `finish_band` to complete the band
-/// - Call `finish_raster` to complete the raster
-/// 3. After all rasters are added, call `finish` to get the final StructArray
+/// # Usage
///
-/// Example usage:
/// ```
-/// use sedona_raster::traits::{RasterMetadata, BandMetadata};
-/// use sedona_schema::raster::{StorageType, BandDataType};
/// use sedona_raster::builder::RasterBuilder;
+/// use sedona_schema::raster::BandDataType;
///
/// let mut builder = RasterBuilder::new(1);
-/// let metadata = RasterMetadata {
-/// width: 100, height: 100,
-/// upperleft_x: 0.0, upperleft_y: 0.0,
-/// scale_x: 1.0, scale_y: -1.0,
-/// skew_x: 0.0, skew_y: 0.0,
-/// };
-/// // Start a raster from RasterMetadata struct
-/// builder.start_raster(&metadata, Some("EPSG:4326")).unwrap();
///
-/// // Add a band:
-/// let band_metadata = BandMetadata {
-/// nodata_value: Some(vec![0u8]),
-/// storage_type: StorageType::InDb,
-/// datatype: BandDataType::UInt8,
-/// outdb_url: None,
-/// outdb_band_id: None,
-/// };
-/// builder.start_band(band_metadata).unwrap();
-/// let band_writer = builder.band_data_writer();
-/// band_writer.append_value(&vec![/* band data bytes */]);
-/// builder.finish_band().unwrap();
+/// // 2D raster convenience: sets transform, spatial_dims=["x","y"],
spatial_shape=[w,h]
+/// builder.start_raster_2d(100, 100, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0,
Some("EPSG:4326")).unwrap();
///
-/// // Finish the raster
+/// // 2D band convenience: sets dim_names=["y","x"], shape=[h,w], contiguous
strides
+/// builder.start_band_2d(BandDataType::UInt8, Some(&[0u8])).unwrap();
+/// builder.band_data_writer().append_value(&vec![0u8; 10000]);
+/// builder.finish_band().unwrap();
/// builder.finish_raster().unwrap();
///
-/// // Finish building and get the StructArray
/// let raster_array = builder.finish().unwrap();
/// ```
pub struct RasterBuilder {
- // Metadata fields
- width: UInt64Builder,
- height: UInt64Builder,
- upper_left_x: Float64Builder,
- upper_left_y: Float64Builder,
- scale_x: Float64Builder,
- scale_y: Float64Builder,
- skew_x: Float64Builder,
- skew_y: Float64Builder,
-
- // CRS field
+ // Top-level raster fields
crs: StringViewBuilder,
-
- // Band metadata fields
- band_nodata: BinaryBuilder,
- band_storage_type: UInt32Builder,
+ transform_values: Float64Builder,
+ transform_offsets: Vec<i32>,
+ spatial_dims_values: StringViewBuilder,
+ spatial_dims_offsets: Vec<i32>,
+ spatial_shape_values: Int64Builder,
+ spatial_shape_offsets: Vec<i32>,
+
+ // Band fields (flattened across all bands)
+ band_name: StringBuilder,
+ band_dim_names_values: StringBuilder,
+ band_dim_names_offsets: Vec<i32>,
+ band_shape_values: UInt64Builder,
+ band_shape_offsets: Vec<i32>,
band_datatype: UInt32Builder,
- band_outdb_url: StringBuilder,
- band_outdb_band_id: UInt32Builder,
-
- // Band data field
+ band_nodata: BinaryBuilder,
+ // VIEW field — one entry per visible dimension per band. Stored as four
+ // parallel Int64 columns + a List offset vector; assembled into a
+ // `ListArray<StructArray<Int64,Int64,Int64,Int64>>` in `finish()`.
+ band_view_source_axis_values: Int64Builder,
+ band_view_start_values: Int64Builder,
+ band_view_step_values: Int64Builder,
+ band_view_steps_values: Int64Builder,
+ band_view_offsets: Vec<i32>,
+ // Per-band validity for the view list. `false` means the row is null —
+ // the canonical representation of an identity view. `true` means the row
+ // carries an explicit view in the four parallel value builders.
+ band_view_validity: Vec<bool>,
+ band_outdb_uri: StringBuilder,
+ band_outdb_format: StringViewBuilder,
band_data: BinaryViewBuilder,
// List structure tracking
band_offsets: Vec<i32>, // Track where each raster's bands start/end
current_band_count: i32, // Track bands in current raster
- raster_validity: BooleanBuilder, // Track which rasters are null
+ // Current raster state (needed for start_band_2d)
+ current_width: u64,
+ current_height: u64,
+
+ // Per-raster validation state: spatial dims/shape and recorded bands so
+ // finish_raster can check every band matches the top-level spatial grid.
+ current_spatial_dims: Vec<String>,
+ current_spatial_shape: Vec<i64>,
+ current_raster_bands: Vec<(Vec<String>, Vec<u64>)>,
+
+ // Track band_data count at the start of each band for finish_band
validation
+ band_data_count_at_start: usize,
+
+ raster_validity: BooleanBuilder,
}
impl RasterBuilder {
- /// Create a new raster builder with the specified capacity
+ /// Create a new raster builder with the specified capacity.
pub fn new(capacity: usize) -> Self {
Self {
- // Metadata builders
- width: UInt64Builder::with_capacity(capacity),
- height: UInt64Builder::with_capacity(capacity),
- upper_left_x: Float64Builder::with_capacity(capacity),
- upper_left_y: Float64Builder::with_capacity(capacity),
- scale_x: Float64Builder::with_capacity(capacity),
- scale_y: Float64Builder::with_capacity(capacity),
- skew_x: Float64Builder::with_capacity(capacity),
- skew_y: Float64Builder::with_capacity(capacity),
-
- // CRS builder
crs: StringViewBuilder::with_capacity(capacity),
-
- // Band builders - estimate some bands per raster
- // The capacity is at raster level, but each raster has multiple
bands and
- // are large. We may want to add an optional parameter to control
expected
- // bands per raster or even band size in the future
- band_nodata: BinaryBuilder::with_capacity(capacity, capacity),
- band_storage_type: UInt32Builder::with_capacity(capacity),
+ transform_values: Float64Builder::with_capacity(capacity * 6),
+ transform_offsets: vec![0],
+ spatial_dims_values: StringViewBuilder::with_capacity(capacity *
2),
+ spatial_dims_offsets: vec![0],
+ spatial_shape_values: Int64Builder::with_capacity(capacity * 2),
+ spatial_shape_offsets: vec![0],
+
+ band_name: StringBuilder::with_capacity(capacity, capacity),
+ band_dim_names_values: StringBuilder::with_capacity(capacity * 2,
capacity * 4),
+ band_dim_names_offsets: vec![0],
+ band_shape_values: UInt64Builder::with_capacity(capacity * 2),
+ band_shape_offsets: vec![0],
band_datatype: UInt32Builder::with_capacity(capacity),
- band_outdb_url: StringBuilder::with_capacity(capacity, capacity),
- band_outdb_band_id: UInt32Builder::with_capacity(capacity),
+ band_nodata: BinaryBuilder::with_capacity(capacity, capacity),
+ band_view_source_axis_values: Int64Builder::with_capacity(capacity
* 2),
+ band_view_start_values: Int64Builder::with_capacity(capacity * 2),
+ band_view_step_values: Int64Builder::with_capacity(capacity * 2),
+ band_view_steps_values: Int64Builder::with_capacity(capacity * 2),
+ band_view_offsets: vec![0],
+ band_view_validity: Vec::with_capacity(capacity),
+ band_outdb_uri: StringBuilder::with_capacity(capacity, capacity),
+ band_outdb_format: StringViewBuilder::with_capacity(capacity),
band_data: BinaryViewBuilder::with_capacity(capacity),
- // List tracking
band_offsets: vec![0],
current_band_count: 0,
+ current_width: 0,
+ current_height: 0,
+
+ current_spatial_dims: Vec::new(),
+ current_spatial_shape: Vec::new(),
+ current_raster_bands: Vec::new(),
+
+ band_data_count_at_start: 0,
- // Raster-level validity (keeps track of null rasters)
raster_validity: BooleanBuilder::with_capacity(capacity),
}
}
- /// Start a new raster with metadata and optional CRS
+ /// Start a new raster with explicit N-D parameters.
+ ///
+ /// `transform` must be a 6-element GDAL GeoTransform:
+ /// `[origin_x, scale_x, skew_x, origin_y, skew_y, scale_y]`
+ ///
+ /// `spatial_dims` names the raster-level spatial dimensions (today always
+ /// length 2, e.g. `["x","y"]`). `spatial_shape` gives their sizes in the
+ /// same order. Every band added to this raster must contain each name in
+ /// `spatial_dims` within its own `dim_names`, with matching size.
pub fn start_raster(
&mut self,
- metadata: &dyn MetadataRef,
+ transform: &[f64; 6],
+ spatial_dims: &[&str],
+ spatial_shape: &[i64],
crs: Option<&str>,
) -> Result<(), ArrowError> {
- self.append_metadata_from_ref(metadata)?;
- self.append_crs(crs)?;
+ if spatial_dims.len() != spatial_shape.len() {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "spatial_dims.len() ({}) must equal spatial_shape.len() ({})",
+ spatial_dims.len(),
+ spatial_shape.len()
+ )));
+ }
+
+ // Transform
+ for &v in transform {
+ self.transform_values.append_value(v);
+ }
+ let next = *self.transform_offsets.last().unwrap() + 6;
+ self.transform_offsets.push(next);
+
+ // Spatial dims + shape
+ for d in spatial_dims {
+ self.spatial_dims_values.append_value(d);
+ }
+ let next = *self.spatial_dims_offsets.last().unwrap() +
spatial_dims.len() as i32;
+ self.spatial_dims_offsets.push(next);
+
+ for &s in spatial_shape {
+ self.spatial_shape_values.append_value(s);
+ }
+ let next = *self.spatial_shape_offsets.last().unwrap() +
spatial_shape.len() as i32;
+ self.spatial_shape_offsets.push(next);
+
+ // CRS
+ match crs {
+ Some(crs_data) => self.crs.append_value(crs_data),
+ None => self.crs.append_null(),
+ }
- // Reset band count for this raster
self.current_band_count = 0;
+ self.current_spatial_dims = spatial_dims.iter().map(|s|
s.to_string()).collect();
+ self.current_spatial_shape = spatial_shape.to_vec();
+ self.current_raster_bands.clear();
+ // Preserve legacy current_width/current_height for start_band_2d (set
+ // by start_raster_2d). Callers using this direct entry point drive
+ // their own shapes via start_band.
+ self.current_width = 0;
+ self.current_height = 0;
+
+ Ok(())
+ }
+ /// Convenience: start a 2D raster with the legacy 8-parameter interface.
+ ///
+ /// Sets `spatial_dims=["x","y"]`, `spatial_shape=[width, height]`, and
+ /// builds the 6-element GDAL transform from the individual parameters.
+ #[allow(clippy::too_many_arguments)]
+ pub fn start_raster_2d(
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]