Fokko commented on code in PR #8393:
URL: https://github.com/apache/iceberg/pull/8393#discussion_r1306487300


##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
         """Returns the IDs of the current schema."""
         return set(self._name_to_id.values())
 
+    @staticmethod
+    def validate_identifier_field(field_id: int, id_to_field: Dict[int, 
NestedField], id_to_parent: Dict[int, int]) -> None:
+        """Validates that the field with the given ID is a valid identifier 
field.
+
+        Args:
+          field_id: The ID of the field to validate.
+          id_to_field: A map from field IDs to field objects.
+          id_to_parent: A map from field IDs to their parent field IDs.
+
+        Raises:
+          ValueError: If the field is not valid.
+        """
+        field = id_to_field.get(field_id)
+        if field is None:
+            raise ValueError(f"Cannot add fieldId {field_id} as an identifier 
field: field does not exist")

Review Comment:
   ```suggestion
               raise ValueError(f"Identifier field {field_id} invalid: does not 
exist")
   ```



##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
         """Returns the IDs of the current schema."""
         return set(self._name_to_id.values())
 
+    @staticmethod
+    def validate_identifier_field(field_id: int, id_to_field: Dict[int, 
NestedField], id_to_parent: Dict[int, int]) -> None:

Review Comment:
   Why not make this one non-static and private:
   ```suggestion
       def _validate_identifier_field(self, field_id: int) -> None:
   ```
   
   This way we can just call `self.find_field`



##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
         """Returns the IDs of the current schema."""
         return set(self._name_to_id.values())
 
+    @staticmethod
+    def validate_identifier_field(field_id: int, id_to_field: Dict[int, 
NestedField], id_to_parent: Dict[int, int]) -> None:
+        """Validates that the field with the given ID is a valid identifier 
field.
+
+        Args:
+          field_id: The ID of the field to validate.
+          id_to_field: A map from field IDs to field objects.
+          id_to_parent: A map from field IDs to their parent field IDs.
+
+        Raises:
+          ValueError: If the field is not valid.
+        """
+        field = id_to_field.get(field_id)
+        if field is None:
+            raise ValueError(f"Cannot add fieldId {field_id} as an identifier 
field: field does not exist")
+
+        if not field.field_type.is_primitive:
+            raise ValueError(f"Cannot add field {field.name} as an identifier 
field: not a primitive type field")

Review Comment:
   ```suggestion
               raise ValueError(f"Identifier field {field_id} invalid: not a 
primitive type field")
   ```



##########
python/pyiceberg/schema.py:
##########
@@ -88,6 +88,10 @@ def __init__(self, *fields: NestedField, **data: Any):
             data["fields"] = fields
         super().__init__(**data)
         self._name_to_id = index_by_name(self)
+        if self.identifier_field_ids is not None:

Review Comment:
   That looks great! Having this in `schema.py` is the right place



##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
         """Returns the IDs of the current schema."""
         return set(self._name_to_id.values())
 
+    @staticmethod
+    def validate_identifier_field(field_id: int, id_to_field: Dict[int, 
NestedField], id_to_parent: Dict[int, int]) -> None:
+        """Validates that the field with the given ID is a valid identifier 
field.
+
+        Args:
+          field_id: The ID of the field to validate.
+          id_to_field: A map from field IDs to field objects.
+          id_to_parent: A map from field IDs to their parent field IDs.
+
+        Raises:
+          ValueError: If the field is not valid.
+        """
+        field = id_to_field.get(field_id)
+        if field is None:
+            raise ValueError(f"Cannot add fieldId {field_id} as an identifier 
field: field does not exist")
+
+        if not field.field_type.is_primitive:
+            raise ValueError(f"Cannot add field {field.name} as an identifier 
field: not a primitive type field")
+
+        if not field.required:
+            raise ValueError(f"Cannot add field {field.name} as an identifier 
field: not a required field")
+
+        if isinstance(field.field_type, (DoubleType, FloatType)):
+            raise ValueError(f"Cannot add field {field.name} as an identifier 
field: must not be float or double field")

Review Comment:
   ```suggestion
               raise ValueError(f"Identifier field {field_id} invalid: must not 
be float or double field")
   ```



##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
         """Returns the IDs of the current schema."""
         return set(self._name_to_id.values())
 
+    @staticmethod
+    def validate_identifier_field(field_id: int, id_to_field: Dict[int, 
NestedField], id_to_parent: Dict[int, int]) -> None:
+        """Validates that the field with the given ID is a valid identifier 
field.
+
+        Args:
+          field_id: The ID of the field to validate.
+          id_to_field: A map from field IDs to field objects.
+          id_to_parent: A map from field IDs to their parent field IDs.
+
+        Raises:
+          ValueError: If the field is not valid.
+        """
+        field = id_to_field.get(field_id)
+        if field is None:
+            raise ValueError(f"Cannot add fieldId {field_id} as an identifier 
field: field does not exist")
+
+        if not field.field_type.is_primitive:
+            raise ValueError(f"Cannot add field {field.name} as an identifier 
field: not a primitive type field")
+
+        if not field.required:
+            raise ValueError(f"Cannot add field {field.name} as an identifier 
field: not a required field")

Review Comment:
   ```suggestion
               raise ValueError(f"Identifier field {field_id} invalid: not a 
required field")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to