kevinjqliu commented on code in PR #2410:
URL: https://github.com/apache/iceberg-python/pull/2410#discussion_r2732822986


##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,37 @@ def partition_to_path(self, data: Record, schema: Schema) 
-> str:
         path = "/".join([field_str + "=" + value_str for field_str, value_str 
in zip(field_strs, value_strs, strict=True)])
         return path
 
+    def check_compatible(self, schema: Schema, allow_missing_fields: bool = 
False) -> None:
+        # if the underlying field is dropped, we cannot check they are 
compatible -- continue
+        schema_fields = schema._lazy_id_to_field
+        parents = schema._lazy_id_to_parent
+
+        for field in self.fields:
+            source_field = schema_fields.get(field.source_id)
+
+            if allow_missing_fields and source_field is None:
+                continue
+
+            if isinstance(field.transform, VoidTransform):
+                continue
+
+            if not source_field:
+                raise ValidationError(f"Cannot find source column for 
partition field: {field}")
+
+            source_type = source_field.field_type
+            if not source_type.is_primitive:
+                raise ValidationError(f"Cannot partition by non-primitive 
source field: {source_type}")
+            if not field.transform.can_transform(source_type):
+                raise ValidationError(f"Invalid source type {source_type} for 
transform: {field.transform}")

Review Comment:
   ```suggestion
                   raise ValidationError(
                       f"Invalid source field {source_field.name} with type 
{source_type} "
                       + f"for transform: {field.transform}"
                   )
   ```
   matches the error in sort order



##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,37 @@ def partition_to_path(self, data: Record, schema: Schema) 
-> str:
         path = "/".join([field_str + "=" + value_str for field_str, value_str 
in zip(field_strs, value_strs, strict=True)])
         return path
 
+    def check_compatible(self, schema: Schema, allow_missing_fields: bool = 
False) -> None:
+        # if the underlying field is dropped, we cannot check they are 
compatible -- continue
+        schema_fields = schema._lazy_id_to_field
+        parents = schema._lazy_id_to_parent
+
+        for field in self.fields:
+            source_field = schema_fields.get(field.source_id)
+
+            if allow_missing_fields and source_field is None:
+                continue
+
+            if isinstance(field.transform, VoidTransform):
+                continue
+
+            if not source_field:
+                raise ValidationError(f"Cannot find source column for 
partition field: {field}")
+
+            source_type = source_field.field_type
+            if not source_type.is_primitive:
+                raise ValidationError(f"Cannot partition by non-primitive 
source field: {source_type}")
+            if not field.transform.can_transform(source_type):
+                raise ValidationError(f"Invalid source type {source_type} for 
transform: {field.transform}")
+
+            # The only valid parent types for a PartitionField are 
StructTypes. This must be checked recursively
+            parent_id = parents.get(field.source_id)
+            while parent_id:
+                parent_type = schema.find_type(parent_id)
+                if not parent_type.is_struct:
+                    raise ValidationError(f"Invalid partition field parent: 
{parent_type}")

Review Comment:
   maybe a bit more info here too



##########
pyiceberg/partitioning.py:
##########
@@ -249,6 +250,37 @@ def partition_to_path(self, data: Record, schema: Schema) 
-> str:
         path = "/".join([field_str + "=" + value_str for field_str, value_str 
in zip(field_strs, value_strs, strict=True)])
         return path
 
+    def check_compatible(self, schema: Schema, allow_missing_fields: bool = 
False) -> None:
+        # if the underlying field is dropped, we cannot check they are 
compatible -- continue
+        schema_fields = schema._lazy_id_to_field
+        parents = schema._lazy_id_to_parent
+
+        for field in self.fields:
+            source_field = schema_fields.get(field.source_id)
+
+            if allow_missing_fields and source_field is None:
+                continue
+
+            if isinstance(field.transform, VoidTransform):
+                continue
+
+            if not source_field:
+                raise ValidationError(f"Cannot find source column for 
partition field: {field}")
+
+            source_type = source_field.field_type
+            if not source_type.is_primitive:
+                raise ValidationError(f"Cannot partition by non-primitive 
source field: {source_type}")

Review Comment:
   ```suggestion
                   raise ValidationError(f"Cannot partition by non-primitive 
source field: {source_field}")
   ```
   this should also include the source_type with the name. matches the error in 
sort order



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to