nvartolomei commented on issue #2123:
URL:
https://github.com/apache/iceberg-python/issues/2123#issuecomment-3137979451
Maybe it is fixed in main branch?
```toml
[project]
name = "pyiceberg-example"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.9"
dependencies = [
"pyiceberg-core>=0.5.1",
"pyiceberg[pyarrow,sql-sqlite]>=0.9.1",
]
```
```py
#!/usr/bin/env python3
import os
from datetime import datetime
import pyarrow as pa
from pyiceberg.catalog import load_catalog
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import NestedField, Schema
from pyiceberg.transforms import IdentityTransform
from pyiceberg.types import StringType
def local_catalog():
# Create a local catalog
warehouse_path = os.path.join(os.getcwd(), "warehouse")
os.makedirs(warehouse_path, exist_ok=True)
return load_catalog(
"local_catalog",
**{
"type": "sql",
"uri": "sqlite:///iceberg_catalog.db",
"warehouse": warehouse_path,
},
)
def main():
catalog = local_catalog()
# Create namespace if it doesn't exist
if ("default", ) not in catalog.list_namespaces():
catalog.create_namespace("default")
# Table identifier
table_identifier = "default.nested_table"
# Define schema with nested structure and timestamp
schema = Schema(
NestedField(id=1, name="name", field_type=StringType(),
required=True),
NestedField(id=2, name="😎", field_type=StringType(), required=False),
)
partition_spec = PartitionSpec(
PartitionField(
source_id=2,
field_id=1001,
transform=IdentityTransform(),
name="😎",
))
table = catalog.create_table(identifier=table_identifier,
schema=schema,
partition_spec=partition_spec)
print(
f"Table {table.name()} created successfully with partition spec:
{table.spec()}"
)
table.append(
pa.Table.from_pylist([{
"name": "Foo",
"😎": "Cool Foo",
"timestamp": datetime(2023, 10, 1, 12, 0, 0)
}, {
"name": "Bar",
"😎": "Cool Bar",
"timestamp": datetime(2023, 10, 2, 14, 30, 0)
}],
schema=table.schema().as_arrow()))
if __name__ == "__main__":
main()
```
```sh
duckdb --version
# v1.3.1 (Ossivalis) 2063dda3e6
uv run nested_partition_field.py
# Table ('default', 'nested_table') created successfully with partition
spec: [
# 1000: 😎: identity(2)
# ]
# Location /home/nv/src/pyiceberg-example/warehouse/default.db/nested_table
# Writing file to
/home/nv/src/pyiceberg-example/warehouse/default.db/nested_table/data/%F0%9F%98%8E=Cool+Bar/00000-1-621d9a51-3b81-4c83-8a23-5af2c568c6d1.parquet
# Writing file to
/home/nv/src/pyiceberg-example/warehouse/default.db/nested_table/data/%F0%9F%98%8E=Cool+Foo/00000-0-621d9a51-3b81-4c83-8a23-5af2c568c6d1.parquet
printf "SELECT * FROM iceberg_scan('%s')" $(ls --color=never
warehouse/default.db/nested_table/metadata/*.json | sort -nr | head -n1) |
duckdb
# Invalid Input Error:
# Cannot parse file header: Invalid Avro identifier
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]