Fokko commented on code in PR #1865:
URL: https://github.com/apache/iceberg-python/pull/1865#discussion_r2024396389


##########
tests/io/test_pyarrow.py:
##########
@@ -2317,3 +2321,66 @@ def test_pyarrow_io_multi_fs() -> None:
 
         # Same PyArrowFileIO instance resolves local file input to 
LocalFileSystem
         assert 
isinstance(pyarrow_file_io.new_input("file:///path/to/file")._filesystem, 
LocalFileSystem)
+
+
+def test_scan_nulls(catalog: InMemoryCatalog, arrow_table_with_null: pa.Table) 
-> None:
+    import pyarrow.compute as pc
+
+    catalog.create_namespace("default")
+    table = catalog.create_table(
+        "default.test_scan_nulls",
+        schema=arrow_table_with_null.schema,
+    )
+    table.append(arrow_table_with_null)
+
+    # "string": ["a", None, "z"]
+    assert len(table.scan(row_filter="string is null").to_arrow()) == 1
+    assert len(table.scan(row_filter=IsNull("string")).to_arrow()) == 1
+    assert len(table.scan().to_arrow().filter(pc.field("string").is_null())) 
== 1
+
+    assert len(table.scan(row_filter="string is not null").to_arrow()) == 2
+    assert len(table.scan(row_filter=NotNull("string")).to_arrow()) == 2
+    assert len(table.scan().to_arrow().filter(pc.field("string").is_valid())) 
== 2
+
+    assert len(table.scan(row_filter="string == 'a'").to_arrow()) == 1
+    assert len(table.scan(row_filter=EqualTo(term="string", 
literal=("a"))).to_arrow()) == 1
+    assert len(table.scan().to_arrow().filter(pc.field("string") == "a")) == 1
+
+    # this should be 2
+    assert len(table.scan(row_filter="string != 'a'").to_arrow()) == 1
+    assert len(table.scan(row_filter=NotEqualTo(term="string", 
literal=("a"))).to_arrow()) == 1
+    assert len(table.scan(row_filter=Not(EqualTo(term="string", 
literal=("a")))).to_arrow()) == 1
+    assert len(table.scan().to_arrow().filter(pc.field("string") != "a")) == 1

Review Comment:
   Another options is:
   
   ```python
   table.scan().to_arrow().filter(pc.coalesce(pc.field("string") != "a", 
pc.scalar(False))
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to