Fokko commented on code in PR #8393:
URL: https://github.com/apache/iceberg/pull/8393#discussion_r1306487300
##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
"""Returns the IDs of the current schema."""
return set(self._name_to_id.values())
+ @staticmethod
+ def validate_identifier_field(field_id: int, id_to_field: Dict[int,
NestedField], id_to_parent: Dict[int, int]) -> None:
+ """Validates that the field with the given ID is a valid identifier
field.
+
+ Args:
+ field_id: The ID of the field to validate.
+ id_to_field: A map from field IDs to field objects.
+ id_to_parent: A map from field IDs to their parent field IDs.
+
+ Raises:
+ ValueError: If the field is not valid.
+ """
+ field = id_to_field.get(field_id)
+ if field is None:
+ raise ValueError(f"Cannot add fieldId {field_id} as an identifier
field: field does not exist")
Review Comment:
```suggestion
raise ValueError(f"Identifier field {field_id} invalid: does not
exist")
```
##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
"""Returns the IDs of the current schema."""
return set(self._name_to_id.values())
+ @staticmethod
+ def validate_identifier_field(field_id: int, id_to_field: Dict[int,
NestedField], id_to_parent: Dict[int, int]) -> None:
Review Comment:
Why not make this one non-static and private:
```suggestion
def _validate_identifier_field(self, field_id: int) -> None:
```
This way we can just call `self.find_field`
##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
"""Returns the IDs of the current schema."""
return set(self._name_to_id.values())
+ @staticmethod
+ def validate_identifier_field(field_id: int, id_to_field: Dict[int,
NestedField], id_to_parent: Dict[int, int]) -> None:
+ """Validates that the field with the given ID is a valid identifier
field.
+
+ Args:
+ field_id: The ID of the field to validate.
+ id_to_field: A map from field IDs to field objects.
+ id_to_parent: A map from field IDs to their parent field IDs.
+
+ Raises:
+ ValueError: If the field is not valid.
+ """
+ field = id_to_field.get(field_id)
+ if field is None:
+ raise ValueError(f"Cannot add fieldId {field_id} as an identifier
field: field does not exist")
+
+ if not field.field_type.is_primitive:
+ raise ValueError(f"Cannot add field {field.name} as an identifier
field: not a primitive type field")
Review Comment:
```suggestion
raise ValueError(f"Identifier field {field_id} invalid: not a
primitive type field")
```
##########
python/pyiceberg/schema.py:
##########
@@ -88,6 +88,10 @@ def __init__(self, *fields: NestedField, **data: Any):
data["fields"] = fields
super().__init__(**data)
self._name_to_id = index_by_name(self)
+ if self.identifier_field_ids is not None:
Review Comment:
That looks great! Having this in `schema.py` is the right place
##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
"""Returns the IDs of the current schema."""
return set(self._name_to_id.values())
+ @staticmethod
+ def validate_identifier_field(field_id: int, id_to_field: Dict[int,
NestedField], id_to_parent: Dict[int, int]) -> None:
+ """Validates that the field with the given ID is a valid identifier
field.
+
+ Args:
+ field_id: The ID of the field to validate.
+ id_to_field: A map from field IDs to field objects.
+ id_to_parent: A map from field IDs to their parent field IDs.
+
+ Raises:
+ ValueError: If the field is not valid.
+ """
+ field = id_to_field.get(field_id)
+ if field is None:
+ raise ValueError(f"Cannot add fieldId {field_id} as an identifier
field: field does not exist")
+
+ if not field.field_type.is_primitive:
+ raise ValueError(f"Cannot add field {field.name} as an identifier
field: not a primitive type field")
+
+ if not field.required:
+ raise ValueError(f"Cannot add field {field.name} as an identifier
field: not a required field")
+
+ if isinstance(field.field_type, (DoubleType, FloatType)):
+ raise ValueError(f"Cannot add field {field.name} as an identifier
field: must not be float or double field")
Review Comment:
```suggestion
raise ValueError(f"Identifier field {field_id} invalid: must not
be float or double field")
```
##########
python/pyiceberg/schema.py:
##########
@@ -273,6 +282,49 @@ def field_ids(self) -> Set[int]:
"""Returns the IDs of the current schema."""
return set(self._name_to_id.values())
+ @staticmethod
+ def validate_identifier_field(field_id: int, id_to_field: Dict[int,
NestedField], id_to_parent: Dict[int, int]) -> None:
+ """Validates that the field with the given ID is a valid identifier
field.
+
+ Args:
+ field_id: The ID of the field to validate.
+ id_to_field: A map from field IDs to field objects.
+ id_to_parent: A map from field IDs to their parent field IDs.
+
+ Raises:
+ ValueError: If the field is not valid.
+ """
+ field = id_to_field.get(field_id)
+ if field is None:
+ raise ValueError(f"Cannot add fieldId {field_id} as an identifier
field: field does not exist")
+
+ if not field.field_type.is_primitive:
+ raise ValueError(f"Cannot add field {field.name} as an identifier
field: not a primitive type field")
+
+ if not field.required:
+ raise ValueError(f"Cannot add field {field.name} as an identifier
field: not a required field")
Review Comment:
```suggestion
raise ValueError(f"Identifier field {field_id} invalid: not a
required field")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]