Fokko commented on code in PR #1865: URL: https://github.com/apache/iceberg-python/pull/1865#discussion_r2024396389
########## tests/io/test_pyarrow.py: ########## @@ -2317,3 +2321,66 @@ def test_pyarrow_io_multi_fs() -> None: # Same PyArrowFileIO instance resolves local file input to LocalFileSystem assert isinstance(pyarrow_file_io.new_input("file:///path/to/file")._filesystem, LocalFileSystem) + + +def test_scan_nulls(catalog: InMemoryCatalog, arrow_table_with_null: pa.Table) -> None: + import pyarrow.compute as pc + + catalog.create_namespace("default") + table = catalog.create_table( + "default.test_scan_nulls", + schema=arrow_table_with_null.schema, + ) + table.append(arrow_table_with_null) + + # "string": ["a", None, "z"] + assert len(table.scan(row_filter="string is null").to_arrow()) == 1 + assert len(table.scan(row_filter=IsNull("string")).to_arrow()) == 1 + assert len(table.scan().to_arrow().filter(pc.field("string").is_null())) == 1 + + assert len(table.scan(row_filter="string is not null").to_arrow()) == 2 + assert len(table.scan(row_filter=NotNull("string")).to_arrow()) == 2 + assert len(table.scan().to_arrow().filter(pc.field("string").is_valid())) == 2 + + assert len(table.scan(row_filter="string == 'a'").to_arrow()) == 1 + assert len(table.scan(row_filter=EqualTo(term="string", literal=("a"))).to_arrow()) == 1 + assert len(table.scan().to_arrow().filter(pc.field("string") == "a")) == 1 + + # this should be 2 + assert len(table.scan(row_filter="string != 'a'").to_arrow()) == 1 + assert len(table.scan(row_filter=NotEqualTo(term="string", literal=("a"))).to_arrow()) == 1 + assert len(table.scan(row_filter=Not(EqualTo(term="string", literal=("a")))).to_arrow()) == 1 + assert len(table.scan().to_arrow().filter(pc.field("string") != "a")) == 1 Review Comment: Another options is: ```python table.scan().to_arrow().filter(pc.coalesce(pc.field("string") != "a", pc.scalar(False)) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org