Copilot commented on code in PR #807:
URL: https://github.com/apache/sedona-db/pull/807#discussion_r3176239663


##########
python/sedonadb/src/expr.rs:
##########
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_common::{Column, ScalarValue};
+use datafusion_expr::{
+    expr::{FieldMetadata, InList},
+    Cast, Expr,
+};
+use pyo3::prelude::*;
+
+use crate::error::PySedonaError;
+use crate::import_from::{import_arrow_array, import_arrow_field};
+
+#[pyclass(name = "InternalExpr")]
+#[derive(Clone)]
+pub struct PyExpr {
+    pub inner: Expr,
+}
+
+impl PyExpr {
+    pub fn new(inner: Expr) -> Self {
+        Self { inner }
+    }
+}
+
+#[pymethods]
+impl PyExpr {
+    fn __repr__(&self) -> String {
+        format!("{}", self.inner)
+    }
+
+    fn debug_string(&self) -> String {
+        format!("{:?}", self.inner)
+    }
+
+    fn variant_name(&self) -> String {
+        self.inner.variant_name().to_string()
+    }
+
+    fn alias(&self, name: &str) -> Result<Self, PySedonaError> {
+        let inner = self.inner.clone().alias_if_changed(name.to_string())?;
+        Ok(Self { inner })
+    }
+
+    fn cast(&self, target: Bound<'_, PyAny>) -> Result<Self, PySedonaError> {
+        let field = import_arrow_field(&target)?;
+        if let Some(type_name) = field.extension_type_name() {
+            return Err(PySedonaError::SedonaPython(format!(
+                "Can't cast to Arrow extension type '{type_name}'"
+            )));
+        }
+        let inner = Expr::Cast(Cast::new(
+            Box::new(self.inner.clone()),
+            field.data_type().clone(),
+        ));
+        Ok(Self { inner })
+    }
+
+    fn is_null(&self) -> Self {
+        Self {
+            inner: Expr::IsNull(Box::new(self.inner.clone())),
+        }
+    }
+
+    fn is_not_null(&self) -> Self {
+        Self {
+            inner: Expr::IsNotNull(Box::new(self.inner.clone())),
+        }
+    }
+
+    #[pyo3(signature = (values, negated=false))]
+    fn isin(&self, values: Vec<PyRef<'_, PyExpr>>, negated: bool) -> Self {
+        let list = values.iter().map(|e| e.inner.clone()).collect();
+        Self {
+            inner: Expr::InList(InList::new(Box::new(self.inner.clone()), 
list, negated)),
+        }
+    }
+
+    fn negate(&self) -> Self {
+        Self {
+            inner: Expr::Negative(Box::new(self.inner.clone())),
+        }
+    }
+}
+
+#[pyfunction]
+pub fn expr_col(name: &str) -> PyExpr {
+    PyExpr {
+        inner: Expr::Column(Column::new_unqualified(name)),
+    }
+}
+
+#[pyfunction]
+pub fn expr_lit(obj: Bound<'_, PyAny>) -> Result<PyExpr, PySedonaError> {
+    let (field, array) = import_arrow_array(&obj)?;
+    if array.len() != 1 {
+        return Err(PySedonaError::SedonaPython(format!(
+            "Expected literal Arrow array of length 1, got length {}",
+            array.len()
+        )));
+    }
+    let scalar_value = ScalarValue::try_from_array(&array, 0)?;
+    let metadata = if field.metadata().is_empty() {
+        None
+    } else {
+        Some(FieldMetadata::new_from_field(&field))
+    };
+    let inner = Expr::Literal(scalar_value, metadata);
+    Ok(PyExpr { inner })

Review Comment:
   `expr_lit()` duplicates the existing Arrow-scalar import/coercion logic in 
`import_from::import_arrow_scalar` (length==1 check, ScalarValue conversion, 
FieldMetadata handling). Consider reusing `import_arrow_scalar` here so 
metadata/extension handling and error messages stay consistent across the 
codebase and future fixes only need to happen in one place.



##########
python/sedonadb/python/sedonadb/expr/expression.py:
##########
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Iterable
+
+from sedonadb._lib import expr_col as _expr_col
+from sedonadb._lib import expr_lit as _expr_lit
+from sedonadb.expr.literal import Literal
+
+
+class Expr:
+    """A column expression.
+
+    `Expr` represents a logical expression that will be evaluated against a
+    `DataFrame` when the frame is executed. Expressions are pure syntax — they
+    do not carry data and are not bound to a particular frame at construction
+    time. Errors such as referring to a column that does not exist surface only
+    when the expression is consumed (for example, by `DataFrame.select()` or
+    `DataFrame.filter()`).
+
+    Construct an `Expr` with `col(name)` or `lit(value)`.
+    """
+
+    __slots__ = ("_impl",)
+
+    def __init__(self, impl):
+        # impl is the underlying _lib.InternalExpr handle. Users normally
+        # do not construct Expr directly; use col() / lit() instead.
+        self._impl = impl
+
+    def __repr__(self) -> str:
+        return f"Expr({self._impl!r})"
+
+    def alias(self, name: str) -> "Expr":
+        """Return a copy of the expression with a new output name."""
+        return Expr(self._impl.alias(name))
+
+    def cast(self, target) -> "Expr":
+        """Cast the expression to the given Arrow type.
+
+        `target` must be an object exposing the Arrow C schema interface
+        (e.g. `pyarrow.int64()`, `pyarrow.string()`, a `pyarrow.Field`, or any
+        object with `__arrow_c_schema__`). Casting to Arrow extension types is
+        not supported.
+        """
+        return Expr(self._impl.cast(target))
+
+    def is_null(self) -> "Expr":
+        """Return a boolean expression that is true where this expression is
+        null (matches both SQL NULL and floating-point NaN)."""

Review Comment:
   `Expr.is_null()` docstring says it matches both SQL NULL and floating-point 
NaN, but the underlying Rust implementation builds DataFusion `Expr::IsNull` 
(renders as `IS NULL`), which checks NULL only (NaN is not NULL in SQL). Please 
adjust the docstring to avoid promising NaN semantics (or add a separate 
NaN-aware helper if that behavior is intended).
   



##########
python/sedonadb/tests/expr/test_expression.py:
##########
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+import pytest
+
+from sedonadb.expr import Expr, col, lit
+
+
+def test_col_returns_expr():
+    e = col("x")
+    assert isinstance(e, Expr)
+    assert e._impl.variant_name() == "Column"
+    assert "x" in repr(e)
+
+
+def test_lit_from_python_scalar():
+    e = lit(5)
+    assert isinstance(e, Expr)
+    assert e._impl.variant_name() == "Literal"
+    assert "Int64(5)" in repr(e)
+
+
+def test_lit_passthrough_for_existing_expr():
+    e = col("x")
+    assert lit(e) is e
+
+
+def test_lit_from_pyarrow_scalar():
+    arr = pa.array([42])
+    e = lit(arr[0])
+    assert "Int64(42)" in repr(e)
+
+
+def test_lit_from_string():
+    assert "Utf8" in repr(lit("hello"))
+
+
+def test_lit_from_none():
+    e = lit(None)
+    assert "Null" in repr(e) or "NULL" in repr(e)
+
+
+def test_alias():
+    e = col("x").alias("y")
+    assert "x AS y" in repr(e)
+
+
+def test_alias_chain():
+    e = col("x").alias("a").alias("b")
+    # Either nested or last-wins; both encode the user intent.
+    assert "b" in repr(e)
+
+
+def test_cast_to_arrow_type():
+    e = col("x").cast(pa.int32())
+    assert "CAST(x AS Int32)" in repr(e)

Review Comment:
   These tests assert exact substrings of DataFusion's `Expr` Display 
formatting (e.g., `CAST(x AS Int32)`), which can change between DataFusion 
versions without any semantic change and make the suite brittle. Prefer 
asserting on `variant_name()` (and/or other stable properties) instead of exact 
formatting when the goal is to validate expression structure.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to