jqin61 commented on code in PR #453: URL: https://github.com/apache/iceberg-python/pull/453#discussion_r1506256332
########## pyiceberg/partitioning.py: ########## @@ -215,3 +246,59 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre ) ) return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID) + + +@dataclass(frozen=True) +class PartitionFieldValue: + field: PartitionField + value: Any + + +@dataclass(frozen=True) +class PartitionKey: + raw_partition_field_values: List[PartitionFieldValue] + partition_spec: PartitionSpec + schema: Schema + + @cached_property + def partition(self) -> Record: # partition key transformed with iceberg internal representation as input + iceberg_typed_key_values = {} + for raw_partition_field_value in self.raw_partition_field_values: + partition_fields = self.partition_spec.source_id_to_fields_map[raw_partition_field_value.field.source_id] + if len(partition_fields) != 1: + raise ValueError("partition_fields must contain exactly one field.") + partition_field = partition_fields[0] + iceberg_type = self.schema.find_field(name_or_id=raw_partition_field_value.field.source_id).field_type + iceberg_typed_value = _to_iceberg_internal_representation(iceberg_type, raw_partition_field_value.value) + transformed_value = partition_field.transform.transform(iceberg_type)(iceberg_typed_value) + iceberg_typed_key_values[partition_field.name] = transformed_value + return Record(**iceberg_typed_key_values) + + def to_path(self) -> str: + return self.partition_spec.partition_to_path(self.partition, self.schema) + + +@singledispatch +def _to_iceberg_internal_representation(type: IcebergType, value: Any) -> Any: Review Comment: i checked this line after spark write into iceberg. ```snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.partition``` and get ```spark_partition_for_justification=Record[uuid_field='f47ac10b-58cc-4372-a567-0e02b2c3d479']``` so looks like it is string representation in the data_file.partition? ########## pyiceberg/partitioning.py: ########## @@ -215,3 +246,59 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre ) ) return PartitionSpec(*partition_fields, spec_id=INITIAL_PARTITION_SPEC_ID) + + +@dataclass(frozen=True) +class PartitionFieldValue: + field: PartitionField + value: Any + + +@dataclass(frozen=True) +class PartitionKey: + raw_partition_field_values: List[PartitionFieldValue] + partition_spec: PartitionSpec + schema: Schema + + @cached_property + def partition(self) -> Record: # partition key transformed with iceberg internal representation as input + iceberg_typed_key_values = {} + for raw_partition_field_value in self.raw_partition_field_values: + partition_fields = self.partition_spec.source_id_to_fields_map[raw_partition_field_value.field.source_id] + if len(partition_fields) != 1: + raise ValueError("partition_fields must contain exactly one field.") + partition_field = partition_fields[0] + iceberg_type = self.schema.find_field(name_or_id=raw_partition_field_value.field.source_id).field_type + iceberg_typed_value = _to_iceberg_internal_representation(iceberg_type, raw_partition_field_value.value) + transformed_value = partition_field.transform.transform(iceberg_type)(iceberg_typed_value) + iceberg_typed_key_values[partition_field.name] = transformed_value + return Record(**iceberg_typed_key_values) + + def to_path(self) -> str: + return self.partition_spec.partition_to_path(self.partition, self.schema) + + +@singledispatch +def _to_iceberg_internal_representation(type: IcebergType, value: Any) -> Any: Review Comment: I checked this line after spark write into iceberg. ```snapshot.manifests(iceberg_table.io)[0].fetch_manifest_entry(iceberg_table.io)[0].data_file.partition``` and get ```spark_partition_for_justification=Record[uuid_field='f47ac10b-58cc-4372-a567-0e02b2c3d479']``` so looks like it is string representation in the data_file.partition? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org