kevinjqliu commented on code in PR #1925: URL: https://github.com/apache/iceberg-python/pull/1925#discussion_r2047430938
########## pyiceberg/io/pyarrow.py: ########## @@ -2241,29 +2241,36 @@ def _partition_value(self, partition_field: PartitionField, schema: Schema) -> A if partition_field.source_id not in self.column_aggregates: return None - if not partition_field.transform.preserves_order: + source_field = schema.find_field(partition_field.source_id) + iceberg_transform = partition_field.transform + + if not iceberg_transform.preserves_order: raise ValueError( f"Cannot infer partition value from parquet metadata for a non-linear Partition Field: {partition_field.name} with transform {partition_field.transform}" ) - lower_value = partition_record_value( - partition_field=partition_field, - value=self.column_aggregates[partition_field.source_id].current_min, - schema=schema, + transform_func = iceberg_transform.transform(source_field.field_type) + + lower_value = transform_func( + partition_record_value( + partition_field=partition_field, + value=self.column_aggregates[partition_field.source_id].current_min, + schema=schema, + ) ) - upper_value = partition_record_value( - partition_field=partition_field, - value=self.column_aggregates[partition_field.source_id].current_max, - schema=schema, + upper_value = transform_func( + partition_record_value( + partition_field=partition_field, + value=self.column_aggregates[partition_field.source_id].current_max, + schema=schema, + ) ) if lower_value != upper_value: raise ValueError( f"Cannot infer partition value from parquet metadata as there are more than one partition values for Partition Field: {partition_field.name}. {lower_value=}, {upper_value=}" ) - source_field = schema.find_field(partition_field.source_id) - transform = partition_field.transform.transform(source_field.field_type) - return transform(lower_value) Review Comment: ah bug was introduced [here](https://github.com/apache/iceberg-python/pull/1555/files#diff-8d5e63f2a87ead8cebe2fd8ac5dcf2198d229f01e16bb9e06e21f7277c328abdR2179-R2182) the values need be to transformed first before comparison -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org