kevinjqliu commented on issue #992: URL: https://github.com/apache/iceberg-python/issues/992#issuecomment-2266221072
Here's what I ran based on the video, I cannot reproduce the issue using 0.7.0. In Jupyter, ``` !pip install pyiceberg==0.7.0 --force -q ``` With pyiceberg integration docker running, `docker compose -f dev/docker-compose-integration.yml up -d`, ``` from pyiceberg.catalog import load_catalog import pyarrow as pa from pyiceberg.schema import Schema from pyiceberg.types import NestedField, StringType from pyiceberg.expressions import EqualTo catalog = load_catalog( "demo", **{ "type": "rest", "uri": "http://localhost:8181", "s3.endpoint": "http://localhost:9000", "s3.access-key-id": "admin", "s3.secret-access-key": "password", }, ) catalog.create_namespace_if_not_exists("default") schema = Schema( NestedField(field_id=1, name="table_id", field_type=StringType(), required=True), NestedField(field_id=2, name="name", field_type=StringType(), required=True), NestedField(field_id=3, name="dataset", field_type=StringType(), required=True), NestedField( field_id=4, name="description", field_type=StringType(), required=False ), identifier_field_ids=[1], ) try: catalog.purge_table("default.some_table") except: pass table = catalog.create_table("default.some_table", schema=schema) schema = pa.schema([ ("table_id", pa.string(), False), ("name", pa.string(), False), ("dataset", pa.string(), False), ("description", pa.string(), True) ]) # Create data arrays data = [ ['id_1', 'id_2', 'id_3', 'id_4'], ['name_1', 'name_2', 'name_3', 'name_4'], ['dataset_1', 'dataset_1', 'dataset_2', 'dataset_3'], ['desc_1', 'desc_2', 'desc_3', 'desc_4'] ] # Create PyArrow Table table_data = pa.Table.from_arrays(data, schema=schema) table = catalog.load_table("default.some_table") table.append(table_data) from pyiceberg.expressions import BooleanExpression, And, EqualTo, AlwaysTrue def build_row_filter(equality_key_value: dict) -> BooleanExpression: if not equality_key_value or len(equality_key_value) == 0: return AlwaysTrue else: expressions: list[BooleanExpression] = [] for key in equality_key_value: expressions.append(EqualTo(key, equality_key_value[key])) if len(expressions) == 1: return expressions[0] else: return And(*expressions) requested_columns = ["*"] scan = table.scan( selected_fields=(requested_columns), row_filter=build_row_filter({ "dataset": "dataset_1", }) ) scan.to_pandas() ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org