CTTY commented on code in PR #2592:
URL: https://github.com/apache/iceberg-rust/pull/2592#discussion_r3406281496


##########
crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs:
##########
@@ -225,17 +225,115 @@ fn to_iceberg_operation(op: Operator) -> 
OpTransformedResult {
 /// identified by name at runtime, so we need to handle them here.
 fn scalar_function_to_iceberg_predicate(func_name: &str, args: &[Expr]) -> 
TransformedResult {
     match func_name {
-        // TODO: support complex expression arguments to scalar functions
-        "isnan" if args.len() == 1 => {
-            let operand = to_iceberg_predicate(&args[0]);
-            match operand {
-                TransformedResult::Column(r) => 
TransformedResult::Predicate(Predicate::Unary(
-                    UnaryExpression::new(PredicateOperator::IsNan, r),
-                )),
-                _ => TransformedResult::NotTransformed,
+        "isnan" if args.len() == 1 => match 
resolve_nan_preserving_reference(&args[0]) {
+            Some(r) => TransformedResult::Predicate(r.is_nan()),
+            None => TransformedResult::NotTransformed,
+        },
+        _ => TransformedResult::NotTransformed,
+    }
+}
+
+/// Attempts to resolve a numeric expression argument down to a single column
+/// [`Reference`] such that `isnan(arg)` is logically equivalent to
+/// `isnan(reference)`.
+///
+/// Filter pushdown is reported as `Inexact` (see
+/// [`IcebergTableProvider::supports_filters_pushdown`]), so DataFusion
+/// re-applies the original predicate after scanning. We therefore only need 
the
+/// pushed-down predicate to be implied by the original filter (it may match
+/// extra rows, but must never drop a matching one). Every transformation 
handled
+/// here preserves NaN-ness *exactly* — the result is NaN if and only if the
+/// wrapped column is NaN — so both `isnan(arg)` and `NOT isnan(arg)` are 
sound:
+///
+/// * negation: `-x` is NaN iff `x` is NaN
+/// * `abs(x)`: `abs(x)` is NaN iff `x` is NaN
+/// * casts between numeric types preserve NaN
+/// * `x + c`, `c + x`, `x - c`, `c - x` for a finite literal `c`
+/// * `x * c`, `c * x`, `x / c` for a finite, non-zero literal `c`
+///
+/// Multiplication/division by zero and `c / x` are intentionally rejected: 
e.g.
+/// `x * 0` is NaN when `x` is `±inf`, so it does not imply `x` is NaN.
+///
+/// [`IcebergTableProvider::supports_filters_pushdown`]: 
crate::table::IcebergTableProvider
+fn resolve_nan_preserving_reference(expr: &Expr) -> Option<Reference> {
+    match expr {
+        Expr::Column(column) => Some(Reference::new(column.name())),
+        Expr::Negative(inner) => resolve_nan_preserving_reference(inner),
+        Expr::Cast(cast) => {
+            // Casts to date truncate the value and are not numeric, so they
+            // cannot be treated as NaN-preserving.
+            if cast.data_type == DataType::Date32 || cast.data_type == 
DataType::Date64 {
+                return None;
             }
+            resolve_nan_preserving_reference(&cast.expr)
         }
-        _ => TransformedResult::NotTransformed,
+        Expr::ScalarFunction(ScalarFunction { func, args })
+            if func.name() == "abs" && args.len() == 1 =>
+        {
+            resolve_nan_preserving_reference(&args[0])
+        }
+        Expr::BinaryExpr(binary) => resolve_nan_preserving_binary(binary),
+        _ => None,
+    }
+}
+
+/// Resolves the column reference from an arithmetic expression that combines a
+/// single column with a finite literal while preserving NaN-ness. See
+/// [`resolve_nan_preserving_reference`] for the soundness argument.
+fn resolve_nan_preserving_binary(binary: &BinaryExpr) -> Option<Reference> {

Review Comment:
   I agree that we don't have to support it in this PR, can we add a note here 
in the comment?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to