Copilot commented on code in PR #683:
URL: https://github.com/apache/sedona-db/pull/683#discussion_r2908047003
##########
c/sedona-s2geography/src/s2geography.rs:
##########
@@ -34,190 +33,37 @@ pub fn s2_cell_id_from_lnglat(lnglat: (f64, f64)) -> u64 {
unsafe { SedonaGeographyGlueLngLatToCellId(lnglat.0, lnglat.1) }
}
-/// Wrapper for scalar UDFs exposed by s2geography::arrow_udf
-///
-/// Provides a minimal wrapper around the C callables that define
-/// an scalar UDF as exposed by the s2geography library.
-///
-/// These are designed to be sufficiently cheap to initialize that
-/// they can be constructed on the stack, initialized, and executed
-/// for a single batch.
-#[derive(Debug)]
-pub struct S2ScalarUDF {
- inner: SedonaGeographyArrowUdf,
-}
-
-impl Drop for S2ScalarUDF {
- fn drop(&mut self) {
- if let Some(releaser) = self.inner.release {
- unsafe { releaser(&mut self.inner) }
- }
- }
-}
-
-// We have a bunch of these, so we use a macro to declare them. We could
-// use a string or enum to retrieve them as well if this becomes unwieldy.
-macro_rules! define_s2_udfs {
- ($($name:ident),*) => {
- $(
- pub fn $name() -> S2ScalarUDF {
- let mut out = Self::allocate();
- unsafe { paste::paste!([<SedonaGeographyInitUdf $name>])(&mut
out.inner) }
- out
- }
- )*
- }
-}
-
-#[allow(non_snake_case)]
-impl S2ScalarUDF {
- define_s2_udfs![
- Area,
- Centroid,
- ClosestPoint,
- Contains,
- ConvexHull,
- Difference,
- Distance,
- Equals,
- Intersection,
- Intersects,
- Length,
- LineInterpolatePoint,
- LineLocatePoint,
- MaxDistance,
- Perimeter,
- ShortestLine,
- SymDifference,
- Union
- ];
-
- /// Initialize the UDF instance with argument types and options
- ///
- /// This must be called before calling execute().
- pub fn init(
- &mut self,
- arg_types: Fields,
- options: Option<HashMap<String, String>>,
- ) -> Result<FFI_ArrowSchema, S2GeographyError> {
- if options.is_some() {
- // See FFI_ArrowSchema::with_metadata() for implementation. This
- // is to pass options like the radius to use for
length/perimeter/area
- // calculations that are currently hard-coded to the WGS84 mean
radius.
- return Err(S2GeographyError::Internal(
- "scalar UDF options not yet implemented".to_string(),
- ));
- }
-
- let arg_schema = Schema::new(arg_types);
- let mut ffi_arg_schema = FFI_ArrowSchema::try_from(arg_schema)?;
- let ffi_options = [0x00, 0x00, 0x00, 0x00];
- let mut ffi_field_out = FFI_ArrowSchema::empty();
-
- unsafe {
- let ffi_arg_schema: *mut ArrowSchema =
- &mut ffi_arg_schema as *mut FFI_ArrowSchema as *mut
ArrowSchema;
- let ffi_out: *mut ArrowSchema =
- &mut ffi_field_out as *mut FFI_ArrowSchema as *mut ArrowSchema;
- let errc = self.inner.init.unwrap()(
- &mut self.inner,
- ffi_arg_schema,
- ffi_options.as_ptr(),
- ffi_out,
- );
-
- let last_err = self.last_error();
- if errc != 0 && last_err.is_empty() {
- Err(S2GeographyError::Code(errc))
- } else if errc != 0 {
- Err(S2GeographyError::Message(errc, last_err))
- } else {
- Ok(ffi_field_out)
- }
- }
- }
-
- /// Execute a batch
- ///
- /// The resulting FFI_ArrowArray requires the FFI_ArrowSchema returned by
- /// init() to be transformed into an [ArrayRef].
- pub fn execute(&mut self, args: &[ArrayRef]) -> Result<FFI_ArrowArray,
S2GeographyError> {
- let mut args_ffi = args
- .iter()
- .map(|arg| arrow_array::ffi::to_ffi(&arg.to_data()))
- .collect::<Result<Vec<_>, ArrowError>>()?;
- let arg_ptrs = args_ffi
- .iter_mut()
- .map(|arg| &mut arg.0 as *mut FFI_ArrowArray)
- .collect::<Vec<_>>();
- let mut ffi_array_out = FFI_ArrowArray::empty();
-
- unsafe {
- let arg_ptrs: *mut *mut ArrowArray = arg_ptrs.as_ptr() as *mut
*mut ArrowArray;
- let ffi_out: *mut ArrowArray =
- &mut ffi_array_out as *mut FFI_ArrowArray as *mut ArrowArray;
- let errc = self.inner.execute.unwrap()(
- &mut self.inner,
- arg_ptrs,
- args_ffi.len() as i64,
- ffi_out,
- );
-
- let last_err = self.last_error();
- if errc != 0 && last_err.is_empty() {
- Err(S2GeographyError::Code(errc))
- } else if errc != 0 {
- Err(S2GeographyError::Message(errc, last_err))
- } else {
- Ok(ffi_array_out)
- }
- }
- }
-
- fn last_error(&mut self) -> String {
- let c_str = unsafe {
- let raw_c_str = self.inner.get_last_error.unwrap()(&mut
self.inner);
- std::ffi::CStr::from_ptr(raw_c_str)
- };
-
- c_str.to_string_lossy().into_owned()
- }
-
- fn allocate() -> Self {
- Self {
- inner: SedonaGeographyArrowUdf {
- init: None,
- execute: None,
- get_last_error: None,
- release: None,
- private_data: std::ptr::null_mut(),
- },
- }
- }
+pub fn s2_scalar_kernels() -> Result<Vec<(String, ScalarKernelRef)>> {
+ let mut ffi_scalar_kernels = Vec::<SedonaCScalarKernel>::new();
+ ffi_scalar_kernels.resize_with(unsafe { SedonaGeographyGlueNumKernels() },
Default::default);
+
+ let err_code = unsafe {
+ SedonaGeographyGlueInitKernels(
+ ffi_scalar_kernels.as_mut_ptr() as _,
+ size_of::<SedonaCScalarKernel>() * ffi_scalar_kernels.len(),
+ )
+ };
+
+ if err_code != 0 {
+ panic!("SedonaGeographyGlueInitKernels() failed")
+ }
+
+ ffi_scalar_kernels
+ .into_iter()
+ .map(|c_kernel| {
+ let imported_kernel = ImportedScalarKernel::try_from(c_kernel)?;
+ Ok((
+ imported_kernel.function_name().unwrap().to_string(),
+ Arc::new(imported_kernel) as ScalarKernelRef,
+ ))
Review Comment:
`imported_kernel.function_name().unwrap()` can panic if the imported kernel
is missing a function name. Since this is already in a `Result` context, prefer
returning an error when the function name is absent (e.g., `ok_or_else(...)`),
so callers get a structured failure instead of a panic.
##########
c/sedona-s2geography/tests/mod.rs:
##########
@@ -0,0 +1,312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::ArrayRef;
+use arrow_schema::DataType;
+use datafusion_common::ScalarValue;
+use rstest::rstest;
+use sedona_expr::scalar_udf::SedonaScalarUDF;
+use sedona_s2geography::s2geography::s2_scalar_kernels;
+use sedona_schema::datatypes::{SedonaType, WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY};
+use sedona_testing::{
+ create::{create_array, create_scalar},
+ testers::ScalarUdfTester,
+};
+
+fn s2_udf(name: &str) -> SedonaScalarUDF {
+ for (kernel_name, kernel) in s2_scalar_kernels().unwrap() {
+ if name == kernel_name {
+ return SedonaScalarUDF::from_impl(name, kernel);
+ }
+ }
+
+ panic!("Can't find s2_scalar_udf named '{name}'")
+}
+
+#[rstest]
+fn unary_scalar_kernel(#[values(WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY)]
sedona_type: SedonaType) {
+ let udf = s2_udf("st_length");
+ let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+ assert_eq!(
+ tester.return_type().unwrap(),
+ SedonaType::Arrow(DataType::Float64)
+ );
+
+ // Array -> Array
+ let result = tester
+ .invoke_wkb_array(vec![
+ Some("POINT (0 1)"),
+ Some("LINESTRING (0 0, 0 1)"),
+ Some("POLYGON ((0 0, 1 0, 0 1, 0 0))"),
+ None,
+ ])
+ .unwrap();
+
+ let expected: ArrayRef = arrow_array::create_array!(
+ Float64,
+ [Some(0.0), Some(111195.10117748393), Some(0.0), None]
+ );
+
+ assert_eq!(&result, &expected);
+
+ // Scalar -> Scalar
+ let result = tester
+ .invoke_wkb_scalar(Some("LINESTRING (0 0, 0 1)"))
+ .unwrap();
+ assert_eq!(result, ScalarValue::Float64(Some(111195.10117748393)));
Review Comment:
These tests compare floating-point outputs using exact equality (e.g.,
`111195.10117748393`). That can be brittle across platforms/compiler
flags/library versions. Prefer approximate comparisons with a tolerance (or use
a helper in `sedona_testing` if available) for Float64 results to reduce flaky
CI failures.
##########
c/sedona-s2geography/src/geography_glue_bindgen.rs:
##########
@@ -17,78 +17,15 @@
use std::os::raw::{c_char, c_int, c_void};
-#[repr(C)]
-pub struct ArrowSchema {
- _private: [u8; 0],
-}
-
-#[repr(C)]
-pub struct ArrowArray {
- _private: [u8; 0],
-}
-
-#[repr(C)]
-#[derive(Debug, Copy, Clone)]
-pub struct SedonaGeographyArrowUdf {
- pub init: Option<
- unsafe extern "C" fn(
- self_: *mut SedonaGeographyArrowUdf,
- arg_schema: *mut ArrowSchema,
- options: *const c_char,
- out: *mut ArrowSchema,
- ) -> c_int,
- >,
- pub execute: Option<
- unsafe extern "C" fn(
- self_: *mut SedonaGeographyArrowUdf,
- args: *mut *mut ArrowArray,
- n_args: i64,
- out: *mut ArrowArray,
- ) -> c_int,
- >,
- pub get_last_error:
- Option<unsafe extern "C" fn(self_: *mut SedonaGeographyArrowUdf) ->
*const c_char>,
- pub release: Option<unsafe extern "C" fn(self_: *mut
SedonaGeographyArrowUdf)>,
- pub private_data: *mut c_void,
-}
-
-macro_rules! declare_s2_c_udfs {
- ($($name:ident),*) => {
- $(
- paste::item! {
- pub fn [<SedonaGeographyInitUdf $name>](out: *mut
SedonaGeographyArrowUdf);
- }
- )*
- }
-}
-
unsafe extern "C" {
- pub fn SedonaGeographyGlueNanoarrowVersion() -> *const c_char;
- pub fn SedonaGeographyGlueGeoArrowVersion() -> *const c_char;
pub fn SedonaGeographyGlueOpenSSLVersion() -> *const c_char;
pub fn SedonaGeographyGlueS2GeometryVersion() -> *const c_char;
pub fn SedonaGeographyGlueAbseilVersion() -> *const c_char;
pub fn SedonaGeographyGlueTestLinkage() -> f64;
pub fn SedonaGeographyGlueLngLatToCellId(lng: f64, lat: f64) -> u64;
-
- declare_s2_c_udfs!(
- Area,
- Centroid,
- ClosestPoint,
- Contains,
- ConvexHull,
- Difference,
- Distance,
- Equals,
- Intersection,
- Intersects,
- Length,
- LineInterpolatePoint,
- LineLocatePoint,
- MaxDistance,
- Perimeter,
- ShortestLine,
- SymDifference,
- Union
- );
+ pub fn SedonaGeographyGlueNumKernels() -> usize;
+ pub fn SedonaGeographyGlueInitKernels(
+ kernels_array: *mut c_void,
+ kerenels_size_bytes: usize,
Review Comment:
Typo in parameter name: `kerenels_size_bytes` should be `kernels_size_bytes`
for clarity/consistency with the C header.
```suggestion
kernels_size_bytes: usize,
```
##########
c/sedona-s2geography/src/s2geography.rs:
##########
@@ -34,190 +33,37 @@ pub fn s2_cell_id_from_lnglat(lnglat: (f64, f64)) -> u64 {
unsafe { SedonaGeographyGlueLngLatToCellId(lnglat.0, lnglat.1) }
}
-/// Wrapper for scalar UDFs exposed by s2geography::arrow_udf
-///
-/// Provides a minimal wrapper around the C callables that define
-/// an scalar UDF as exposed by the s2geography library.
-///
-/// These are designed to be sufficiently cheap to initialize that
-/// they can be constructed on the stack, initialized, and executed
-/// for a single batch.
-#[derive(Debug)]
-pub struct S2ScalarUDF {
- inner: SedonaGeographyArrowUdf,
-}
-
-impl Drop for S2ScalarUDF {
- fn drop(&mut self) {
- if let Some(releaser) = self.inner.release {
- unsafe { releaser(&mut self.inner) }
- }
- }
-}
-
-// We have a bunch of these, so we use a macro to declare them. We could
-// use a string or enum to retrieve them as well if this becomes unwieldy.
-macro_rules! define_s2_udfs {
- ($($name:ident),*) => {
- $(
- pub fn $name() -> S2ScalarUDF {
- let mut out = Self::allocate();
- unsafe { paste::paste!([<SedonaGeographyInitUdf $name>])(&mut
out.inner) }
- out
- }
- )*
- }
-}
-
-#[allow(non_snake_case)]
-impl S2ScalarUDF {
- define_s2_udfs![
- Area,
- Centroid,
- ClosestPoint,
- Contains,
- ConvexHull,
- Difference,
- Distance,
- Equals,
- Intersection,
- Intersects,
- Length,
- LineInterpolatePoint,
- LineLocatePoint,
- MaxDistance,
- Perimeter,
- ShortestLine,
- SymDifference,
- Union
- ];
-
- /// Initialize the UDF instance with argument types and options
- ///
- /// This must be called before calling execute().
- pub fn init(
- &mut self,
- arg_types: Fields,
- options: Option<HashMap<String, String>>,
- ) -> Result<FFI_ArrowSchema, S2GeographyError> {
- if options.is_some() {
- // See FFI_ArrowSchema::with_metadata() for implementation. This
- // is to pass options like the radius to use for
length/perimeter/area
- // calculations that are currently hard-coded to the WGS84 mean
radius.
- return Err(S2GeographyError::Internal(
- "scalar UDF options not yet implemented".to_string(),
- ));
- }
-
- let arg_schema = Schema::new(arg_types);
- let mut ffi_arg_schema = FFI_ArrowSchema::try_from(arg_schema)?;
- let ffi_options = [0x00, 0x00, 0x00, 0x00];
- let mut ffi_field_out = FFI_ArrowSchema::empty();
-
- unsafe {
- let ffi_arg_schema: *mut ArrowSchema =
- &mut ffi_arg_schema as *mut FFI_ArrowSchema as *mut
ArrowSchema;
- let ffi_out: *mut ArrowSchema =
- &mut ffi_field_out as *mut FFI_ArrowSchema as *mut ArrowSchema;
- let errc = self.inner.init.unwrap()(
- &mut self.inner,
- ffi_arg_schema,
- ffi_options.as_ptr(),
- ffi_out,
- );
-
- let last_err = self.last_error();
- if errc != 0 && last_err.is_empty() {
- Err(S2GeographyError::Code(errc))
- } else if errc != 0 {
- Err(S2GeographyError::Message(errc, last_err))
- } else {
- Ok(ffi_field_out)
- }
- }
- }
-
- /// Execute a batch
- ///
- /// The resulting FFI_ArrowArray requires the FFI_ArrowSchema returned by
- /// init() to be transformed into an [ArrayRef].
- pub fn execute(&mut self, args: &[ArrayRef]) -> Result<FFI_ArrowArray,
S2GeographyError> {
- let mut args_ffi = args
- .iter()
- .map(|arg| arrow_array::ffi::to_ffi(&arg.to_data()))
- .collect::<Result<Vec<_>, ArrowError>>()?;
- let arg_ptrs = args_ffi
- .iter_mut()
- .map(|arg| &mut arg.0 as *mut FFI_ArrowArray)
- .collect::<Vec<_>>();
- let mut ffi_array_out = FFI_ArrowArray::empty();
-
- unsafe {
- let arg_ptrs: *mut *mut ArrowArray = arg_ptrs.as_ptr() as *mut
*mut ArrowArray;
- let ffi_out: *mut ArrowArray =
- &mut ffi_array_out as *mut FFI_ArrowArray as *mut ArrowArray;
- let errc = self.inner.execute.unwrap()(
- &mut self.inner,
- arg_ptrs,
- args_ffi.len() as i64,
- ffi_out,
- );
-
- let last_err = self.last_error();
- if errc != 0 && last_err.is_empty() {
- Err(S2GeographyError::Code(errc))
- } else if errc != 0 {
- Err(S2GeographyError::Message(errc, last_err))
- } else {
- Ok(ffi_array_out)
- }
- }
- }
-
- fn last_error(&mut self) -> String {
- let c_str = unsafe {
- let raw_c_str = self.inner.get_last_error.unwrap()(&mut
self.inner);
- std::ffi::CStr::from_ptr(raw_c_str)
- };
-
- c_str.to_string_lossy().into_owned()
- }
-
- fn allocate() -> Self {
- Self {
- inner: SedonaGeographyArrowUdf {
- init: None,
- execute: None,
- get_last_error: None,
- release: None,
- private_data: std::ptr::null_mut(),
- },
- }
- }
+pub fn s2_scalar_kernels() -> Result<Vec<(String, ScalarKernelRef)>> {
+ let mut ffi_scalar_kernels = Vec::<SedonaCScalarKernel>::new();
+ ffi_scalar_kernels.resize_with(unsafe { SedonaGeographyGlueNumKernels() },
Default::default);
+
+ let err_code = unsafe {
+ SedonaGeographyGlueInitKernels(
+ ffi_scalar_kernels.as_mut_ptr() as _,
+ size_of::<SedonaCScalarKernel>() * ffi_scalar_kernels.len(),
+ )
Review Comment:
`size_of` is not in scope here (unless imported elsewhere in this module),
which will fail to compile. Use `std::mem::size_of::<SedonaCScalarKernel>()` or
add `use std::mem::size_of;`.
##########
c/sedona-s2geography/src/geography_glue.cc:
##########
@@ -77,58 +71,34 @@ uint64_t SedonaGeographyGlueLngLatToCellId(double lng,
double lat) {
}
}
-struct UdfExporter {
- static void Export(std::unique_ptr<s2geography::arrow_udf::ArrowUDF> udf,
- struct SedonaGeographyArrowUdf* out) {
- out->private_data = udf.release();
- out->init = &CInit;
- out->execute = &CExecute;
- out->get_last_error = &CLastError;
- out->release = &CRelease;
- }
-
- static int CInit(struct SedonaGeographyArrowUdf* self, struct ArrowSchema*
arg_schema,
- const char* options, struct ArrowSchema* out) {
- auto udf =
reinterpret_cast<s2geography::arrow_udf::ArrowUDF*>(self->private_data);
- return udf->Init(arg_schema, options, out);
- }
- static int CExecute(struct SedonaGeographyArrowUdf* self, struct
ArrowArray** args,
- int64_t n_args, struct ArrowArray* out) {
- auto udf =
reinterpret_cast<s2geography::arrow_udf::ArrowUDF*>(self->private_data);
- return udf->Execute(args, n_args, out);
- }
- static const char* CLastError(struct SedonaGeographyArrowUdf* self) {
- auto udf =
reinterpret_cast<s2geography::arrow_udf::ArrowUDF*>(self->private_data);
- return udf->GetLastError();
- }
-
- static void CRelease(struct SedonaGeographyArrowUdf* self) {
- auto udf =
reinterpret_cast<s2geography::arrow_udf::ArrowUDF*>(self->private_data);
- delete udf;
- self->private_data = nullptr;
- }
-};
+size_t SedonaGeographyGlueNumKernels(void) { return 18; }
-#define INIT_UDF_IMPL(name) \
- void SedonaGeographyInitUdf##name(struct SedonaGeographyArrowUdf* out) { \
- return UdfExporter::Export(s2geography::arrow_udf::name(), out); \
+int SedonaGeographyGlueInitKernels(void* kernels_array, size_t
kerenels_size_bytes) {
+ if (kerenels_size_bytes !=
+ (sizeof(SedonaCScalarKernel) * SedonaGeographyGlueNumKernels())) {
+ return EINVAL;
}
Review Comment:
Two issues here: (1) `SedonaGeographyGlueNumKernels()` is hard-coded to 18,
which can drift from the actual exported list over time; consider deriving it
from a single source of truth (e.g., a constexpr array of initializers and
`std::size(...)`). (2) `EINVAL` requires the appropriate errno header
(`<cerrno>` or `<errno.h>`); it’s not included in the shown includes, so this
may fail to compile depending on transitive includes.
##########
c/sedona-s2geography/src/s2geography.rs:
##########
@@ -34,190 +33,37 @@ pub fn s2_cell_id_from_lnglat(lnglat: (f64, f64)) -> u64 {
unsafe { SedonaGeographyGlueLngLatToCellId(lnglat.0, lnglat.1) }
}
-/// Wrapper for scalar UDFs exposed by s2geography::arrow_udf
-///
-/// Provides a minimal wrapper around the C callables that define
-/// an scalar UDF as exposed by the s2geography library.
-///
-/// These are designed to be sufficiently cheap to initialize that
-/// they can be constructed on the stack, initialized, and executed
-/// for a single batch.
-#[derive(Debug)]
-pub struct S2ScalarUDF {
- inner: SedonaGeographyArrowUdf,
-}
-
-impl Drop for S2ScalarUDF {
- fn drop(&mut self) {
- if let Some(releaser) = self.inner.release {
- unsafe { releaser(&mut self.inner) }
- }
- }
-}
-
-// We have a bunch of these, so we use a macro to declare them. We could
-// use a string or enum to retrieve them as well if this becomes unwieldy.
-macro_rules! define_s2_udfs {
- ($($name:ident),*) => {
- $(
- pub fn $name() -> S2ScalarUDF {
- let mut out = Self::allocate();
- unsafe { paste::paste!([<SedonaGeographyInitUdf $name>])(&mut
out.inner) }
- out
- }
- )*
- }
-}
-
-#[allow(non_snake_case)]
-impl S2ScalarUDF {
- define_s2_udfs![
- Area,
- Centroid,
- ClosestPoint,
- Contains,
- ConvexHull,
- Difference,
- Distance,
- Equals,
- Intersection,
- Intersects,
- Length,
- LineInterpolatePoint,
- LineLocatePoint,
- MaxDistance,
- Perimeter,
- ShortestLine,
- SymDifference,
- Union
- ];
-
- /// Initialize the UDF instance with argument types and options
- ///
- /// This must be called before calling execute().
- pub fn init(
- &mut self,
- arg_types: Fields,
- options: Option<HashMap<String, String>>,
- ) -> Result<FFI_ArrowSchema, S2GeographyError> {
- if options.is_some() {
- // See FFI_ArrowSchema::with_metadata() for implementation. This
- // is to pass options like the radius to use for
length/perimeter/area
- // calculations that are currently hard-coded to the WGS84 mean
radius.
- return Err(S2GeographyError::Internal(
- "scalar UDF options not yet implemented".to_string(),
- ));
- }
-
- let arg_schema = Schema::new(arg_types);
- let mut ffi_arg_schema = FFI_ArrowSchema::try_from(arg_schema)?;
- let ffi_options = [0x00, 0x00, 0x00, 0x00];
- let mut ffi_field_out = FFI_ArrowSchema::empty();
-
- unsafe {
- let ffi_arg_schema: *mut ArrowSchema =
- &mut ffi_arg_schema as *mut FFI_ArrowSchema as *mut
ArrowSchema;
- let ffi_out: *mut ArrowSchema =
- &mut ffi_field_out as *mut FFI_ArrowSchema as *mut ArrowSchema;
- let errc = self.inner.init.unwrap()(
- &mut self.inner,
- ffi_arg_schema,
- ffi_options.as_ptr(),
- ffi_out,
- );
-
- let last_err = self.last_error();
- if errc != 0 && last_err.is_empty() {
- Err(S2GeographyError::Code(errc))
- } else if errc != 0 {
- Err(S2GeographyError::Message(errc, last_err))
- } else {
- Ok(ffi_field_out)
- }
- }
- }
-
- /// Execute a batch
- ///
- /// The resulting FFI_ArrowArray requires the FFI_ArrowSchema returned by
- /// init() to be transformed into an [ArrayRef].
- pub fn execute(&mut self, args: &[ArrayRef]) -> Result<FFI_ArrowArray,
S2GeographyError> {
- let mut args_ffi = args
- .iter()
- .map(|arg| arrow_array::ffi::to_ffi(&arg.to_data()))
- .collect::<Result<Vec<_>, ArrowError>>()?;
- let arg_ptrs = args_ffi
- .iter_mut()
- .map(|arg| &mut arg.0 as *mut FFI_ArrowArray)
- .collect::<Vec<_>>();
- let mut ffi_array_out = FFI_ArrowArray::empty();
-
- unsafe {
- let arg_ptrs: *mut *mut ArrowArray = arg_ptrs.as_ptr() as *mut
*mut ArrowArray;
- let ffi_out: *mut ArrowArray =
- &mut ffi_array_out as *mut FFI_ArrowArray as *mut ArrowArray;
- let errc = self.inner.execute.unwrap()(
- &mut self.inner,
- arg_ptrs,
- args_ffi.len() as i64,
- ffi_out,
- );
-
- let last_err = self.last_error();
- if errc != 0 && last_err.is_empty() {
- Err(S2GeographyError::Code(errc))
- } else if errc != 0 {
- Err(S2GeographyError::Message(errc, last_err))
- } else {
- Ok(ffi_array_out)
- }
- }
- }
-
- fn last_error(&mut self) -> String {
- let c_str = unsafe {
- let raw_c_str = self.inner.get_last_error.unwrap()(&mut
self.inner);
- std::ffi::CStr::from_ptr(raw_c_str)
- };
-
- c_str.to_string_lossy().into_owned()
- }
-
- fn allocate() -> Self {
- Self {
- inner: SedonaGeographyArrowUdf {
- init: None,
- execute: None,
- get_last_error: None,
- release: None,
- private_data: std::ptr::null_mut(),
- },
- }
- }
+pub fn s2_scalar_kernels() -> Result<Vec<(String, ScalarKernelRef)>> {
+ let mut ffi_scalar_kernels = Vec::<SedonaCScalarKernel>::new();
+ ffi_scalar_kernels.resize_with(unsafe { SedonaGeographyGlueNumKernels() },
Default::default);
+
+ let err_code = unsafe {
+ SedonaGeographyGlueInitKernels(
+ ffi_scalar_kernels.as_mut_ptr() as _,
+ size_of::<SedonaCScalarKernel>() * ffi_scalar_kernels.len(),
+ )
+ };
+
+ if err_code != 0 {
+ panic!("SedonaGeographyGlueInitKernels() failed")
+ }
Review Comment:
This function returns `Result<...>` but panics on kernel init failure.
Prefer converting the non-zero `err_code` into an error return (e.g.,
`datafusion_common::DataFusionError::External` / internal error via your
crate’s error conventions), ideally including the error code for diagnosability.
##########
c/sedona-s2geography/src/geography_glue.h:
##########
@@ -45,40 +46,9 @@ double SedonaGeographyGlueTestLinkage(void);
uint64_t SedonaGeographyGlueLngLatToCellId(double lng, double lat);
-struct SedonaGeographyArrowUdf {
- int (*init)(struct SedonaGeographyArrowUdf* self, struct ArrowSchema*
arg_schema,
- const char* options, struct ArrowSchema* out);
- int (*execute)(struct SedonaGeographyArrowUdf* self, struct ArrowArray**
args,
- int64_t n_args, struct ArrowArray* out);
- const char* (*get_last_error)(struct SedonaGeographyArrowUdf* self);
- void (*release)(struct SedonaGeographyArrowUdf* self);
+size_t SedonaGeographyGlueNumKernels(void);
- void* private_data;
-};
-
-#define DECLARE_UDF_IMPL(name) \
- void SedonaGeographyInitUdf##name(struct SedonaGeographyArrowUdf* out)
-
-DECLARE_UDF_IMPL(Area);
-DECLARE_UDF_IMPL(Centroid);
-DECLARE_UDF_IMPL(ClosestPoint);
-DECLARE_UDF_IMPL(Contains);
-DECLARE_UDF_IMPL(ConvexHull);
-DECLARE_UDF_IMPL(Difference);
-DECLARE_UDF_IMPL(Distance);
-DECLARE_UDF_IMPL(Equals);
-DECLARE_UDF_IMPL(Intersection);
-DECLARE_UDF_IMPL(Intersects);
-DECLARE_UDF_IMPL(Length);
-DECLARE_UDF_IMPL(LineInterpolatePoint);
-DECLARE_UDF_IMPL(LineLocatePoint);
-DECLARE_UDF_IMPL(MaxDistance);
-DECLARE_UDF_IMPL(Perimeter);
-DECLARE_UDF_IMPL(ShortestLine);
-DECLARE_UDF_IMPL(SymDifference);
-DECLARE_UDF_IMPL(Union);
-
-#undef DECLARE_UDF_IMPL
+int SedonaGeographyGlueInitKernels(void* kernels_array, size_t
kerenels_size_bytes);
Review Comment:
Typo in parameter name: `kerenels_size_bytes` should be `kernels_size_bytes`
for clarity/consistency with the Rust declaration.
```suggestion
int SedonaGeographyGlueInitKernels(void* kernels_array, size_t
kernels_size_bytes);
```
##########
c/sedona-s2geography/src/register.rs:
##########
@@ -14,35 +14,20 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+
+use datafusion_common::Result;
+use sedona_common::sedona_internal_err;
use sedona_expr::scalar_udf::ScalarKernelRef;
+use std::sync::OnceLock;
-use crate::scalar_kernel;
+static S2_SCALAR_KERNELS: OnceLock<Result<Vec<(String, ScalarKernelRef)>>> =
OnceLock::new();
-pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
- vec![
- ("st_area", scalar_kernel::st_area_impl()),
- ("st_centroid", scalar_kernel::st_centroid_impl()),
- ("st_closestpoint", scalar_kernel::st_closest_point_impl()),
- ("st_contains", scalar_kernel::st_contains_impl()),
- ("st_convexhull", scalar_kernel::st_convex_hull_impl()),
- ("st_difference", scalar_kernel::st_difference_impl()),
- ("st_distance", scalar_kernel::st_distance_impl()),
- ("st_equals", scalar_kernel::st_equals_impl()),
- ("st_intersection", scalar_kernel::st_intersection_impl()),
- ("st_intersects", scalar_kernel::st_intersects_impl()),
- (
- "st_lineinterpolatepoint",
- scalar_kernel::st_line_interpolate_point_impl(),
- ),
- (
- "st_linelocatepoint",
- scalar_kernel::st_line_locate_point_impl(),
- ),
- ("st_length", scalar_kernel::st_length_impl()),
- ("st_symdifference", scalar_kernel::st_sym_difference_impl()),
- ("st_maxdistance", scalar_kernel::st_max_distance_impl()),
- ("st_perimeter", scalar_kernel::st_perimeter_impl()),
- ("st_shortestline", scalar_kernel::st_shortest_line_impl()),
- ("st_union", scalar_kernel::st_union_impl()),
- ]
+pub fn scalar_kernels() -> Result<Vec<(&'static str, ScalarKernelRef)>> {
+ match S2_SCALAR_KERNELS.get_or_init(crate::s2geography::s2_scalar_kernels)
{
+ Ok(kernels) => Ok(kernels
+ .iter()
+ .map(|(name, kernel)| (name.as_str(), kernel.clone()))
+ .collect()),
+ Err(err) => sedona_internal_err!("Error initializing s2geography
kernels: {err}"),
+ }
}
Review Comment:
This API cannot safely return `&'static str`: `name.as_str()` is borrowed
from the `String` stored in the `OnceLock`, not a true `'static` string. This
is a compile-time lifetime mismatch (or would be unsound if forced). Return
owned names instead (e.g., `Result<Vec<(String, ScalarKernelRef)>>`), or use an
owned shared string type (e.g., `Arc<str>`) and update call sites to pass
`&name` when needed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]