HonahX commented on code in PR #183: URL: https://github.com/apache/iceberg-python/pull/183#discussion_r1419912292
########## pyiceberg/io/pyarrow.py: ########## @@ -713,28 +721,49 @@ def primitive(self, primitive: pa.DataType) -> Optional[T]: """Visit a primitive type.""" -def _get_field_id(field: pa.Field) -> Optional[int]: - for pyarrow_field_id_key in PYARROW_FIELD_ID_KEYS: - if field_id_str := field.metadata.get(pyarrow_field_id_key): - return int(field_id_str.decode()) - return None +class _ConvertToIceberg(PyArrowSchemaVisitor[Union[IcebergType, Schema]]): + counter: itertools.count[int] + missing_id_metadata: Optional[bool] + def __init__(self) -> None: + self.counter = itertools.count(1) + self.missing_id_metadata = None -def _get_field_doc(field: pa.Field) -> Optional[str]: - for pyarrow_doc_key in PYARROW_FIELD_DOC_KEYS: - if doc_str := field.metadata.get(pyarrow_doc_key): - return doc_str.decode() - return None + def _get_field_id(self, field: pa.Field) -> int: + field_id: Optional[int] = None + for pyarrow_field_id_key in PYARROW_FIELD_ID_KEYS: + if field.metadata and (field_id_str := field.metadata.get(pyarrow_field_id_key)): + field_id = int(field_id_str.decode()) + + if field_id is None: + if self.missing_id_metadata is None: + warnings.warn("Field-ids are missing, new IDs will be set") Review Comment: Shall we add more details to this warning? Like: ``` Warning: Missing field-IDs will be auto-assigned, possibly leading to inconsistencies between the file schema and the schema stored in table metadata. ``` Since there's a potential for fields in the generated `file_schema` to have different `field_ids` compared to those in the `table_schema`. The mismatch may cause other errors later. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org