kevinjqliu commented on code in PR #1865:
URL: https://github.com/apache/iceberg-python/pull/1865#discussion_r2021452599


##########
tests/io/test_pyarrow.py:
##########
@@ -2317,3 +2321,66 @@ def test_pyarrow_io_multi_fs() -> None:
 
         # Same PyArrowFileIO instance resolves local file input to 
LocalFileSystem
         assert 
isinstance(pyarrow_file_io.new_input("file:///path/to/file")._filesystem, 
LocalFileSystem)
+
+
+def test_scan_nulls(catalog: InMemoryCatalog, arrow_table_with_null: pa.Table) 
-> None:
+    import pyarrow.compute as pc
+
+    catalog.create_namespace("default")
+    table = catalog.create_table(
+        "default.test_scan_nulls",
+        schema=arrow_table_with_null.schema,
+    )
+    table.append(arrow_table_with_null)
+
+    # "string": ["a", None, "z"]
+    assert len(table.scan(row_filter="string is null").to_arrow()) == 1
+    assert len(table.scan(row_filter=IsNull("string")).to_arrow()) == 1
+    assert len(table.scan().to_arrow().filter(pc.field("string").is_null())) 
== 1
+
+    assert len(table.scan(row_filter="string is not null").to_arrow()) == 2
+    assert len(table.scan(row_filter=NotNull("string")).to_arrow()) == 2
+    assert len(table.scan().to_arrow().filter(pc.field("string").is_valid())) 
== 2
+
+    assert len(table.scan(row_filter="string == 'a'").to_arrow()) == 1
+    assert len(table.scan(row_filter=EqualTo(term="string", 
literal=("a"))).to_arrow()) == 1
+    assert len(table.scan().to_arrow().filter(pc.field("string") == "a")) == 1
+
+    # this should be 2
+    assert len(table.scan(row_filter="string != 'a'").to_arrow()) == 1
+    assert len(table.scan(row_filter=NotEqualTo(term="string", 
literal=("a"))).to_arrow()) == 1
+    assert len(table.scan(row_filter=Not(EqualTo(term="string", 
literal=("a")))).to_arrow()) == 1
+    assert len(table.scan().to_arrow().filter(pc.field("string") != "a")) == 1
+
+
+def test_scan_kleene(catalog: InMemoryCatalog, arrow_table_with_null: 
pa.Table) -> None:
+    catalog.create_namespace("default")
+    table = catalog.create_table(
+        "default.test_scan_nulls",
+        schema=arrow_table_with_null.schema,
+    )
+    table.append(arrow_table_with_null)
+
+    # "string": ["a", None, "z"]
+    assert len(table.scan(row_filter="string is null OR string = 
'a'").to_arrow()) == 2  # {null, a}
+    assert len(table.scan(row_filter="string is null AND string = 
'a'").to_arrow()) == 0  # {}
+    assert len(table.scan(row_filter="string is not null OR string = 
'a'").to_arrow()) == 2  # {a, z}
+    assert len(table.scan(row_filter="string is not null AND string = 
'a'").to_arrow()) == 1  # {a}
+
+
+def test_scan_complements(catalog: InMemoryCatalog, arrow_table_with_null: 
pa.Table) -> None:
+    from pyiceberg.expressions.visitors import bind
+    from pyiceberg.io.pyarrow import _expression_to_complementary_pyarrow

Review Comment:
   `_expression_to_complementary_pyarrow` explicitly calls out null handling
   
https://github.com/apache/iceberg-python/blob/d69a19113ea537d16c34b60ab6e69c4285f933c0/pyiceberg/io/pyarrow.py#L883-L886



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to