mike-luabase commented on issue #208: URL: https://github.com/apache/iceberg-python/issues/208#issuecomment-2156618883
Here's what I've been trying (sorry for long example, but thought the context would help) ```python iowa_sales_df = pcsv.read_csv("/Users/mritchie712/blackbird/demoData/Iowa_Liquor_Sales_20240607.csv") date_column = pc.strptime(iowa_sales_df['date'], format='%m/%d/%Y', unit='us') # Replace the old 'date' column with the new date column iowa_sales_df = iowa_sales_df.set_column(iowa_sales_df.schema.get_field_index('date'), 'date', date_column) iceberg_schema = Schema( NestedField(1, "invoice_item_number", StringType(), required=True), NestedField(2, "date", TimestampType(), required=True), NestedField(3, "store_number", LongType(), required=True), NestedField(4, "store_name", StringType(), required=True), NestedField(5, "address", StringType(), required=True), NestedField(6, "city", StringType(), required=True), NestedField(7, "zip_code", StringType(), required=True), NestedField(8, "store_location", StringType(), required=True), NestedField(9, "county_number", LongType(), required=True), NestedField(10, "county", StringType(), required=True), NestedField(11, "category", LongType(), required=True), NestedField(12, "category_name", StringType(), required=True), NestedField(13, "vendor_number", LongType(), required=True), NestedField(14, "vendor_name", StringType(), required=True), NestedField(15, "item_number", StringType(), required=True), NestedField(16, "item_description", StringType(), required=True), NestedField(17, "pack", LongType(), required=True), NestedField(18, "bottle_volume_ml", LongType(), required=True), NestedField(19, "state_bottle_cost", DoubleType(), required=True), NestedField(20, "state_bottle_retail", DoubleType(), required=True), NestedField(21, "bottles_sold", LongType(), required=True), NestedField(22, "sale_dollars", DoubleType(), required=True), NestedField(23, "volume_sold_liters", DoubleType(), required=True), NestedField(24, "volume_sold_gallons", DoubleType(), required=True) ) from pyiceberg.partitioning import PartitionSpec, PartitionField from pyiceberg.transforms import DayTransform PARTITION_FIELD_ID_START = 1000 source_id = iowa_sales_df.schema.get_field_index('date') partition_spec = PartitionSpec( PartitionField( source_id=source_id, # ID of the "date" field in the schema (0-based index) field_id=PARTITION_FIELD_ID_START, # Unique ID for the partition field, starting from 1000 transform=DayTransform(), name="date_day" ) ) table = catalog.create_table( "default.iowa_liquor_sales", schema=iceberg_schema, partition_spec=partition_spec, ) table.append(iowa_sales_df) len(table.scan().to_arrow()) ``` ``` { "name": "ValueError", "message": "Cannot write to partitioned tables", "stack": "--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[17], line 1 ----> 1 table.overwrite(iowa_sales_df) 2 # table.append(iowa_sales_df) 3 len(table.scan().to_arrow()) File ~/blackbird/notebooks/.venv/lib/python3.12/site-packages/pyiceberg/table/__init__.py:1094, in Table.overwrite(self, df, overwrite_filter) 1091 raise NotImplementedError(\"Cannot overwrite a subset of a table\") 1093 if len(self.spec().fields) > 0: -> 1094 raise ValueError(\"Cannot write to partitioned tables\") 1096 from pyiceberg.io.pyarrow import schema_to_pyarrow 1098 _check_schema_compatible(self.schema(), other_schema=df.schema) ValueError: Cannot write to partitioned tables" } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org