Copilot commented on code in PR #831:
URL: https://github.com/apache/sedona-db/pull/831#discussion_r3228348440


##########
rust/sedona-raster-gdal/src/rs_frompath.rs:
##########
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! RS_FromPath UDF - Load out-db raster from file path.
+
+use std::sync::Arc;
+
+use arrow_array::Array;
+use arrow_schema::DataType;
+use datafusion_common::cast::as_string_array;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::error::Result;
+use datafusion_expr::{ColumnarValue, Volatility};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_raster::builder::RasterBuilder;
+use sedona_schema::datatypes::{SedonaType, RASTER};
+use sedona_schema::matchers::ArgMatcher;
+
+use crate::gdal_common::with_gdal;
+use crate::gdal_dataset_provider::configure_thread_local_options;
+use crate::utils::append_as_outdb_raster;
+
+pub fn rs_from_path_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "rs_frompath",
+        vec![Arc::new(RsFromPath)],
+        Volatility::Volatile,
+    )
+}

Review Comment:
   The PR title/docs refer to `RS_FromPath`, but the registered UDF name is 
`rs_frompath` (missing underscore and different casing). This is user-visible 
API: please align the registered function name with the documented SQL function 
name (or update the docs/bench accordingly) to avoid confusion and broken 
queries.



##########
rust/sedona-raster-gdal/src/utils.rs:
##########
@@ -109,6 +104,68 @@ pub fn append_as_indb_raster(dataset: &Dataset, builder: 
&mut RasterBuilder) ->
     Ok(())
 }
 
+/// Append a raster source path as a single out-db raster to the provided 
[`RasterBuilder`].
+pub fn append_as_outdb_raster(gdal: &Gdal, path: &str, builder: &mut 
RasterBuilder) -> Result<()> {
+    let gdal_path = normalize_outdb_source_path(path);
+    let dataset = gdal
+        .open_ex_with_options(
+            &gdal_path,
+            DatasetOptions {
+                open_flags: GDAL_OF_RASTER | GDAL_OF_READONLY,
+                ..Default::default()
+            },
+        )
+        .map_err(|e| {
+            exec_datafusion_err!(
+                "Failed to open raster file '{}'(GDAL path '{}'): {}",
+                path,
+                gdal_path,
+                e
+            )
+        })?;

Review Comment:
   The error message is missing a space before `(GDAL path ...)`, which reduces 
readability. Consider changing it to `\"Failed to open raster file '{}' (GDAL 
path '{}'): {}\"`.



##########
rust/sedona/src/context.rs:
##########
@@ -221,6 +221,10 @@ impl SedonaContext {
             Arc::new(RandomGeometryFunction::default()),
         );
 
+        for udf in sedona_raster_gdal::all_gdal_udfs() {
+            out.ctx.register_udf(udf.into());
+        }

Review Comment:
   Registering the path-based GDAL UDFs by default makes it possible for SQL 
users to trigger reads from arbitrary local paths and/or remote URLs (e.g., via 
`/vsicurl/` mapping). If Sedona can run untrusted queries, this is a security 
boundary concern (local file disclosure / SSRF). Consider gating registration 
behind an explicit opt-in setting/feature flag, or adding a configuration check 
(allowlist/denylist of schemes) before enabling these UDFs.



##########
rust/sedona-raster-gdal/src/gdal_common.rs:
##########
@@ -313,6 +288,54 @@ pub fn nodata_bytes_to_f64(nodata_bytes: Option<&[u8]>, 
band_type: &BandDataType
     bytes_to_f64(bytes, band_type).ok()
 }
 
+/// Read a GDAL band's nodata value into a byte vector using the band's native 
type.
+pub fn band_nodata_to_bytes(band: &RasterBand<'_>) -> Result<Option<Vec<u8>>> {
+    let band_type = gdal_to_band_data_type(band.band_type())?;
+
+    Ok(match band_type {
+        BandDataType::UInt64 => band
+            .no_data_value_u64()
+            .map(|nodata| nodata.to_le_bytes().to_vec()),
+        BandDataType::Int64 => band
+            .no_data_value_i64()
+            .map(|nodata| nodata.to_le_bytes().to_vec()),
+        _ => band
+            .no_data_value()
+            .map(|nodata| nodata_f64_to_bytes(nodata, &band_type)),
+    })
+}

Review Comment:
   `band_nodata_to_bytes` recomputes `BandDataType` from the band, but callers 
in `utils.rs` already compute `band_data_type` for metadata/error reporting. To 
avoid duplicate conversions per band, consider adding an overload like 
`band_nodata_to_bytes_typed(band, &BandDataType)` (or passing the computed type 
in) so the band type is derived once per band.



##########
rust/sedona-raster-gdal/src/rs_frompath.rs:
##########
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! RS_FromPath UDF - Load out-db raster from file path.
+
+use std::sync::Arc;
+
+use arrow_array::Array;
+use arrow_schema::DataType;
+use datafusion_common::cast::as_string_array;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::error::Result;
+use datafusion_expr::{ColumnarValue, Volatility};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_raster::builder::RasterBuilder;
+use sedona_schema::datatypes::{SedonaType, RASTER};
+use sedona_schema::matchers::ArgMatcher;
+
+use crate::gdal_common::with_gdal;
+use crate::gdal_dataset_provider::configure_thread_local_options;
+use crate::utils::append_as_outdb_raster;
+
+pub fn rs_from_path_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "rs_frompath",
+        vec![Arc::new(RsFromPath)],
+        Volatility::Volatile,
+    )
+}
+
+#[derive(Debug)]
+pub(crate) struct RsFromPath;
+
+impl SedonaScalarKernel for RsFromPath {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        ArgMatcher::new(vec![ArgMatcher::is_string()], RASTER).match_args(args)
+    }
+
+    fn invoke_batch_from_args(
+        &self,
+        _arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+        _return_type: &SedonaType,
+        _num_rows: usize,
+        config_options: Option<&ConfigOptions>,
+    ) -> Result<ColumnarValue> {
+        with_gdal(|gdal| {
+            configure_thread_local_options(gdal, config_options)?;
+
+            let paths = args[0].cast_to(&DataType::Utf8, None)?.into_array(1)?;
+            let path_array = as_string_array(&paths)?;
+
+            let len = path_array.len();
+            let mut builder = RasterBuilder::new(len);
+            for i in 0..len {
+                if path_array.is_null(i) {
+                    builder.append_null()?;
+                } else {
+                    let path = path_array.value(i);
+                    append_as_outdb_raster(gdal, path, &mut builder)?;
+                }
+            }

Review Comment:
   New behavior here includes (1) null propagation (input path NULL => output 
raster NULL) and (2) error behavior for invalid/unopenable paths. The current 
tests cover scalar/array/empty, but don’t assert null handling or the error 
message/variant for a bad path. Please add unit tests for an input array 
containing NULL and for an invalid path to lock in expected semantics.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to