nvartolomei commented on issue #2123:
URL: 
https://github.com/apache/iceberg-python/issues/2123#issuecomment-3137979451

   Maybe it is fixed in main branch?
   
   ```toml
   [project]
   name = "pyiceberg-example"
   version = "0.1.0"
   description = "Add your description here"
   readme = "README.md"
   requires-python = ">=3.9"
   dependencies = [
       "pyiceberg-core>=0.5.1",
       "pyiceberg[pyarrow,sql-sqlite]>=0.9.1",
   ]
   ```
   
   ```py
   #!/usr/bin/env python3
   
   import os
   from datetime import datetime
   
   import pyarrow as pa
   from pyiceberg.catalog import load_catalog
   from pyiceberg.partitioning import PartitionField, PartitionSpec
   from pyiceberg.schema import NestedField, Schema
   from pyiceberg.transforms import IdentityTransform
   from pyiceberg.types import StringType
   
   
   def local_catalog():
       # Create a local catalog
       warehouse_path = os.path.join(os.getcwd(), "warehouse")
       os.makedirs(warehouse_path, exist_ok=True)
   
       return load_catalog(
           "local_catalog",
           **{
               "type": "sql",
               "uri": "sqlite:///iceberg_catalog.db",
               "warehouse": warehouse_path,
           },
       )
   
   
   def main():
       catalog = local_catalog()
   
       # Create namespace if it doesn't exist
       if ("default", ) not in catalog.list_namespaces():
           catalog.create_namespace("default")
   
       # Table identifier
       table_identifier = "default.nested_table"
   
       # Define schema with nested structure and timestamp
       schema = Schema(
           NestedField(id=1, name="name", field_type=StringType(), 
required=True),
           NestedField(id=2, name="😎", field_type=StringType(), required=False),
       )
   
       partition_spec = PartitionSpec(
           PartitionField(
               source_id=2,
               field_id=1001,
               transform=IdentityTransform(),
               name="😎",
           ))
   
       table = catalog.create_table(identifier=table_identifier,
                                    schema=schema,
                                    partition_spec=partition_spec)
   
       print(
           f"Table {table.name()} created successfully with partition spec: 
{table.spec()}"
       )
   
       table.append(
           pa.Table.from_pylist([{
               "name": "Foo",
               "😎": "Cool Foo",
               "timestamp": datetime(2023, 10, 1, 12, 0, 0)
           }, {
               "name": "Bar",
               "😎": "Cool Bar",
               "timestamp": datetime(2023, 10, 2, 14, 30, 0)
           }],
                                schema=table.schema().as_arrow()))
   
   
   if __name__ == "__main__":
       main()
   ```
   
   ```sh
   duckdb --version
   #  v1.3.1 (Ossivalis) 2063dda3e6
   
   uv run nested_partition_field.py
   # Table ('default', 'nested_table') created successfully with partition 
spec: [
   #   1000: 😎: identity(2)
   # ]
   # Location /home/nv/src/pyiceberg-example/warehouse/default.db/nested_table
   # Writing file to 
/home/nv/src/pyiceberg-example/warehouse/default.db/nested_table/data/%F0%9F%98%8E=Cool+Bar/00000-1-621d9a51-3b81-4c83-8a23-5af2c568c6d1.parquet
   # Writing file to 
/home/nv/src/pyiceberg-example/warehouse/default.db/nested_table/data/%F0%9F%98%8E=Cool+Foo/00000-0-621d9a51-3b81-4c83-8a23-5af2c568c6d1.parquet
   
   printf "SELECT * FROM iceberg_scan('%s')" $(ls --color=never 
warehouse/default.db/nested_table/metadata/*.json | sort -nr | head -n1) | 
duckdb
   # Invalid Input Error:
   # Cannot parse file header: Invalid Avro identifier
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to