gstvg commented on code in PR #21679:
URL: https://github.com/apache/datafusion/pull/21679#discussion_r3151965424


##########
datafusion/expr/src/higher_order_function.rs:
##########
@@ -0,0 +1,771 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`HigherOrderUDF`]: User Defined Higher Order Functions
+
+use crate::expr::schema_name_from_exprs_comma_separated_without_space;
+use crate::{ColumnarValue, Documentation, Expr};
+use arrow::array::{ArrayRef, RecordBatch};
+use arrow::datatypes::{DataType, FieldRef, Schema};
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::{Result, ScalarValue, exec_err, not_impl_err};
+use datafusion_expr_common::dyn_eq::{DynEq, DynHash};
+use datafusion_expr_common::signature::Volatility;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use std::any::Any;
+use std::cmp::Ordering;
+use std::fmt::Debug;
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
+
+/// The types of arguments for which a function has implementations.
+///
+/// [`HigherOrderTypeSignature`] **DOES NOT** define the types that a user 
query could call the
+/// function with. DataFusion will automatically coerce (cast) argument types 
to
+/// one of the supported function signatures, if possible.
+///
+/// # Overview
+/// Functions typically provide implementations for a small number of different
+/// argument [`DataType`]s, rather than all possible combinations. If a user
+/// calls a function with arguments that do not match any of the declared 
types,
+/// DataFusion will attempt to automatically coerce (add casts to) function
+/// arguments so they match the [`HigherOrderTypeSignature`]. See the 
[`type_coercion`] module
+/// for more details
+///
+/// [`type_coercion`]: crate::type_coercion
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
+pub enum HigherOrderTypeSignature {
+    /// The acceptable signature and coercions rules are special for this
+    /// function.
+    ///
+    /// If this signature is specified,
+    /// DataFusion will call [`HigherOrderUDF::coerce_value_types`] to prepare 
argument types.
+    UserDefined,
+    /// One or more lambdas or arguments with arbitrary types
+    VariadicAny,
+    /// The specified number of lambdas or arguments with arbitrary types.
+    Any(usize),
+}
+
+/// Provides information necessary for calling a higher order function.
+///
+/// - [`HigherOrderTypeSignature`] defines the argument types that a function 
has implementations
+///   for.
+///
+/// - [`Volatility`] defines how the output of the function changes with the 
input.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
+pub struct HigherOrderSignature {
+    /// The data types that the function accepts. See 
[HigherOrderTypeSignature] for more information.
+    pub type_signature: HigherOrderTypeSignature,
+    /// The volatility of the function. See [Volatility] for more information.
+    pub volatility: Volatility,
+    /// Whether [HigherOrderUDF::coerce_values_for_lambdas] should be called
+    pub coerce_values_for_lambdas: bool,
+}
+
+impl HigherOrderSignature {
+    /// Creates a new `HigherOrderSignature` from a given type signature and 
volatility.
+    pub fn new(type_signature: HigherOrderTypeSignature, volatility: 
Volatility) -> Self {
+        HigherOrderSignature {
+            type_signature,
+            volatility,
+            coerce_values_for_lambdas: false,
+        }
+    }
+
+    /// User-defined coercion rules for the function.
+    pub fn user_defined(volatility: Volatility) -> Self {
+        Self {
+            type_signature: HigherOrderTypeSignature::UserDefined,
+            volatility,
+            coerce_values_for_lambdas: false,
+        }
+    }
+
+    /// An arbitrary number of lambdas or arguments of any type.
+    pub fn variadic_any(volatility: Volatility) -> Self {
+        Self {
+            type_signature: HigherOrderTypeSignature::VariadicAny,
+            volatility,
+            coerce_values_for_lambdas: false,
+        }
+    }
+
+    /// A specified number of arguments of any type
+    pub fn any(arg_count: usize, volatility: Volatility) -> Self {
+        Self {
+            type_signature: HigherOrderTypeSignature::Any(arg_count),
+            volatility,
+            coerce_values_for_lambdas: false,
+        }
+    }
+
+    /// Set [Self::coerce_values_for_lambdas] to true to indicate that 
[HigherOrderUDF::coerce_values_for_lambdas]
+    /// should be called
+    pub fn with_coerce_values_for_lambdas(mut self) -> Self {
+        self.coerce_values_for_lambdas = true;
+
+        self
+    }
+}
+
+impl PartialEq for dyn HigherOrderUDF {
+    fn eq(&self, other: &Self) -> bool {
+        self.dyn_eq(other as _)
+    }
+}
+
+impl PartialOrd for dyn HigherOrderUDF {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        let mut cmp = self.name().cmp(other.name());
+        if cmp == Ordering::Equal {
+            cmp = self.signature().partial_cmp(other.signature())?;
+        }
+        if cmp == Ordering::Equal {
+            cmp = self.aliases().partial_cmp(other.aliases())?;
+        }
+        // Contract for PartialOrd and PartialEq consistency requires that
+        // a == b if and only if partial_cmp(a, b) == Some(Equal).
+        if cmp == Ordering::Equal && self != other {
+            // Functions may have other properties besides name and signature
+            // that differentiate two instances (e.g. type, or arbitrary 
parameters).
+            // We cannot return Some(Equal) in such case.
+            return None;
+        }
+        debug_assert!(
+            cmp == Ordering::Equal || self != other,
+            "Detected incorrect implementation of PartialEq when comparing 
functions: '{}' and '{}'. \
+            The functions compare as equal, but they are not equal based on 
general properties that \
+            the PartialOrd implementation observes,",
+            self.name(),
+            other.name()
+        );
+        Some(cmp)
+    }
+}
+
+impl Eq for dyn HigherOrderUDF {}
+
+impl Hash for dyn HigherOrderUDF {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.dyn_hash(state)
+    }
+}
+
+/// Arguments passed to [`HigherOrderUDF::invoke_with_args`] when invoking a
+/// higher order function.
+#[derive(Debug, Clone)]
+pub struct HigherOrderFunctionArgs {
+    /// The evaluated arguments and lambdas to the function
+    pub args: Vec<ValueOrLambda<ColumnarValue, LambdaArgument>>,
+    /// Field associated with each arg, if it exists
+    /// For lambdas, it will be the field of the result of
+    /// the lambda if evaluated with the parameters
+    /// returned from [`HigherOrderUDF::lambda_parameters`]
+    pub arg_fields: Vec<ValueOrLambda<FieldRef, FieldRef>>,
+    /// The number of rows in record batch being evaluated
+    pub number_rows: usize,
+    /// The return field of the higher order function returned
+    /// (from `return_field_from_args`) when creating the
+    /// physical expression from the logical expression
+    pub return_field: FieldRef,
+    /// The config options at execution time
+    pub config_options: Arc<ConfigOptions>,
+}
+
+impl HigherOrderFunctionArgs {
+    /// The return type of the function. See [`Self::return_field`] for more
+    /// details.
+    pub fn return_type(&self) -> &DataType {
+        self.return_field.data_type()
+    }
+}
+
+/// A lambda argument to a HigherOrderFunction
+#[derive(Clone, Debug)]
+pub struct LambdaArgument {
+    /// The parameters defined in this lambda
+    ///
+    /// For example, for `array_transform([2], v -> -v)`,
+    /// this will be `vec![Field::new("v", DataType::Int32, true)]`
+    params: Vec<FieldRef>,
+    /// The body of the lambda
+    ///
+    /// For example, for `array_transform([2], v -> -v)`,
+    /// this will be the physical expression of `-v`
+    body: Arc<dyn PhysicalExpr>,
+    /// A RecordBatch with the captured columns inside the lambda body, if any
+    ///
+    /// For example, for `array_transform([2], v -> v + a + b)`,
+    /// this will be a `RecordBatch` with columns `a` and `b`
+    captures: Option<RecordBatch>,

Review Comment:
   I agree, reverted at 
https://github.com/apache/datafusion/pull/21679/changes/9cb4882b6780214a2af999dfee0c2984eee56285
   cc @LiaCastaneda 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to