Re: [PR] Set Glue Table Information when creating/updating tables [iceberg-python]

via GitHub Mon, 22 Jan 2024 00:09:36 -0800


mgmarino commented on code in PR #288:
URL: https://github.com/apache/iceberg-python/pull/288#discussion_r1461475758



##########
pyiceberg/catalog/glue.py:
##########
@@ -84,19 +110,105 @@ def _construct_parameters(
     return new_parameters
 
 
+def _type_to_glue_type_string(input_type: IcebergType) -> str:
+    if isinstance(input_type, BooleanType):
+        return "boolean"
+    if isinstance(input_type, IntegerType):
+        return "int"
+    if isinstance(input_type, LongType):
+        return "bigint"
+    if isinstance(input_type, FloatType):
+        return "float"
+    if isinstance(input_type, DoubleType):
+        return "double"
+    if isinstance(input_type, DateType):
+        return "date"
+    if isinstance(
+        input_type,
+        (
+            TimeType,
+            StringType,
+            UUIDType,
+        ),
+    ):
+        return "string"
+    if isinstance(input_type, TimestampType):
+        return "timestamp"
+    if isinstance(
+        input_type,
+        (
+            FixedType,
+            BinaryType,
+        ),
+    ):
+        return "binary"
+    if isinstance(input_type, DecimalType):
+        return f"decimal({input_type.precision},{input_type.scale})"
+    if isinstance(input_type, StructType):
+        name_to_type = 
",".join(f"{f.name}:{_type_to_glue_type_string(f.field_type)}" for f in 
input_type.fields)
+        return f"struct<{name_to_type}>"
+    if isinstance(input_type, ListType):
+        return f"array<{_type_to_glue_type_string(input_type.element_type)}>"
+    if isinstance(input_type, MapType):
+        return 
f"map<{_type_to_glue_type_string(input_type.key_type)},{_type_to_glue_type_string(input_type.value_type)}>"
+
+    raise ValueError(f"Unknown Type {input_type}")
+
+
+def _to_columns(metadata: TableMetadataCommonFields) -> List[ColumnTypeDef]:
+    results: Dict[str, ColumnTypeDef] = {}
+
+    def _append_to_results(field: NestedField, is_current: bool) -> None:
+        if field.name in results:
+            return
+
+        results[field.name] = cast(
+            ColumnTypeDef,
+            {
+                "Name": field.name,
+                "Type": _type_to_glue_type_string(field.field_type),
+                "Parameters": {
+                    ICEBERG_FIELD_ID: str(field.field_id),
+                    ICEBERG_FIELD_OPTIONAL: str(field.optional).lower(),
+                    ICEBERG_FIELD_CURRENT: str(is_current).lower(),
+                },
+            },
+        )
+        if field.doc:
+            results[field.name]["Comment"] = field.doc
+
+    if current_schema := metadata.schema_by_id(metadata.current_schema_id):
+        for field in current_schema.columns:
+            _append_to_results(field, True)
+
+    for schema in metadata.schemas:
+        if schema.schema_id == metadata.current_schema_id:
+            continue
+        for field in schema.columns:
+            _append_to_results(field, False)

Review Comment:
   This isn't entirely equivalent. That is, if the current schema is not the 
first schema, this will end up not adding the column name as current. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] Set Glue Table Information when creating/updating tables [iceberg-python]

Reply via email to