HonahX commented on code in PR #288: URL: https://github.com/apache/iceberg-python/pull/288#discussion_r1461512416
########## pyiceberg/catalog/glue.py: ########## @@ -84,19 +110,105 @@ def _construct_parameters( return new_parameters +def _type_to_glue_type_string(input_type: IcebergType) -> str: + if isinstance(input_type, BooleanType): + return "boolean" + if isinstance(input_type, IntegerType): + return "int" + if isinstance(input_type, LongType): + return "bigint" + if isinstance(input_type, FloatType): + return "float" + if isinstance(input_type, DoubleType): + return "double" + if isinstance(input_type, DateType): + return "date" + if isinstance( + input_type, + ( + TimeType, + StringType, + UUIDType, + ), + ): + return "string" + if isinstance(input_type, TimestampType): + return "timestamp" + if isinstance( + input_type, + ( + FixedType, + BinaryType, + ), + ): + return "binary" + if isinstance(input_type, DecimalType): + return f"decimal({input_type.precision},{input_type.scale})" + if isinstance(input_type, StructType): + name_to_type = ",".join(f"{f.name}:{_type_to_glue_type_string(f.field_type)}" for f in input_type.fields) + return f"struct<{name_to_type}>" + if isinstance(input_type, ListType): + return f"array<{_type_to_glue_type_string(input_type.element_type)}>" + if isinstance(input_type, MapType): + return f"map<{_type_to_glue_type_string(input_type.key_type)},{_type_to_glue_type_string(input_type.value_type)}>" + + raise ValueError(f"Unknown Type {input_type}") + + +def _to_columns(metadata: TableMetadataCommonFields) -> List[ColumnTypeDef]: + results: Dict[str, ColumnTypeDef] = {} + + def _append_to_results(field: NestedField, is_current: bool) -> None: + if field.name in results: + return + + results[field.name] = cast( + ColumnTypeDef, + { + "Name": field.name, + "Type": _type_to_glue_type_string(field.field_type), + "Parameters": { + ICEBERG_FIELD_ID: str(field.field_id), + ICEBERG_FIELD_OPTIONAL: str(field.optional).lower(), + ICEBERG_FIELD_CURRENT: str(is_current).lower(), + }, + }, + ) + if field.doc: + results[field.name]["Comment"] = field.doc + + if current_schema := metadata.schema_by_id(metadata.current_schema_id): + for field in current_schema.columns: + _append_to_results(field, True) + + for schema in metadata.schemas: + if schema.schema_id == metadata.current_schema_id: + continue + for field in schema.columns: + _append_to_results(field, False) Review Comment: Ah, I missed that part. Thanks for the explanation! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org