kevinjqliu commented on code in PR #2410:
URL: https://github.com/apache/iceberg-python/pull/2410#discussion_r2732822986
##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,37 @@ def partition_to_path(self, data: Record, schema: Schema)
-> str:
path = "/".join([field_str + "=" + value_str for field_str, value_str
in zip(field_strs, value_strs, strict=True)])
return path
+ def check_compatible(self, schema: Schema, allow_missing_fields: bool =
False) -> None:
+ # if the underlying field is dropped, we cannot check they are
compatible -- continue
+ schema_fields = schema._lazy_id_to_field
+ parents = schema._lazy_id_to_parent
+
+ for field in self.fields:
+ source_field = schema_fields.get(field.source_id)
+
+ if allow_missing_fields and source_field is None:
+ continue
+
+ if isinstance(field.transform, VoidTransform):
+ continue
+
+ if not source_field:
+ raise ValidationError(f"Cannot find source column for
partition field: {field}")
+
+ source_type = source_field.field_type
+ if not source_type.is_primitive:
+ raise ValidationError(f"Cannot partition by non-primitive
source field: {source_type}")
+ if not field.transform.can_transform(source_type):
+ raise ValidationError(f"Invalid source type {source_type} for
transform: {field.transform}")
Review Comment:
```suggestion
raise ValidationError(
f"Invalid source field {source_field.name} with type
{source_type} "
+ f"for transform: {field.transform}"
)
```
matches the error in sort order
##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,37 @@ def partition_to_path(self, data: Record, schema: Schema)
-> str:
path = "/".join([field_str + "=" + value_str for field_str, value_str
in zip(field_strs, value_strs, strict=True)])
return path
+ def check_compatible(self, schema: Schema, allow_missing_fields: bool =
False) -> None:
+ # if the underlying field is dropped, we cannot check they are
compatible -- continue
+ schema_fields = schema._lazy_id_to_field
+ parents = schema._lazy_id_to_parent
+
+ for field in self.fields:
+ source_field = schema_fields.get(field.source_id)
+
+ if allow_missing_fields and source_field is None:
+ continue
+
+ if isinstance(field.transform, VoidTransform):
+ continue
+
+ if not source_field:
+ raise ValidationError(f"Cannot find source column for
partition field: {field}")
+
+ source_type = source_field.field_type
+ if not source_type.is_primitive:
+ raise ValidationError(f"Cannot partition by non-primitive
source field: {source_type}")
+ if not field.transform.can_transform(source_type):
+ raise ValidationError(f"Invalid source type {source_type} for
transform: {field.transform}")
+
+ # The only valid parent types for a PartitionField are
StructTypes. This must be checked recursively
+ parent_id = parents.get(field.source_id)
+ while parent_id:
+ parent_type = schema.find_type(parent_id)
+ if not parent_type.is_struct:
+ raise ValidationError(f"Invalid partition field parent:
{parent_type}")
Review Comment:
maybe a bit more info here too
##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,37 @@ def partition_to_path(self, data: Record, schema: Schema)
-> str:
path = "/".join([field_str + "=" + value_str for field_str, value_str
in zip(field_strs, value_strs, strict=True)])
return path
+ def check_compatible(self, schema: Schema, allow_missing_fields: bool =
False) -> None:
+ # if the underlying field is dropped, we cannot check they are
compatible -- continue
+ schema_fields = schema._lazy_id_to_field
+ parents = schema._lazy_id_to_parent
+
+ for field in self.fields:
+ source_field = schema_fields.get(field.source_id)
+
+ if allow_missing_fields and source_field is None:
+ continue
+
+ if isinstance(field.transform, VoidTransform):
+ continue
+
+ if not source_field:
+ raise ValidationError(f"Cannot find source column for
partition field: {field}")
+
+ source_type = source_field.field_type
+ if not source_type.is_primitive:
+ raise ValidationError(f"Cannot partition by non-primitive
source field: {source_type}")
Review Comment:
```suggestion
raise ValidationError(f"Cannot partition by non-primitive
source field: {source_field}")
```
this should also include the source_type with the name. matches the error in
sort order
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]