potatochipcoconut commented on issue #1934:
URL: 
https://github.com/apache/iceberg-python/issues/1934#issuecomment-2816103494

   Came up with naive attempt, open to feedback. Not sure how it would handle 
e.g. int vs long, float vs double, etc
   ```
   import builtins
   import datetime
   import uuid
   
   from pydantic import BaseModel
   from pydantic import Field
   from pydantic_core import PydanticUndefined
   from pyiceberg.schema import Schema as IcebergSchema
   from pyiceberg.types import (
       BooleanType,
       DateType,
       FloatType,
       IntegerType,
       MapType,
       NestedField,
       PrimitiveType,
       StringType,
       TimestampType,
       UUIDType,
   )
   from typing import (
       List,
       Literal,
   )
   
   
   class UnknownType(PrimitiveType):
       """Remove after next public release."""
   
       root: Literal['unknown'] = Field(default='unknown')
   
   
   class Schema(BaseModel):
       @classmethod
       def model_pyiceberg_schema(cls):
           """
           Generate a PyIceberg Schema for a model class.
   
           Returns:
               The pyiceberg Schema compatible with Apache Iceberg Tables.
           """
           pyiceberg_fields: List[NestedField] = []
   
           for index, (name, field) in enumerate(cls.model_fields.items()):
               default = (
                   field.default if field.default != PydanticUndefined else None
               )
   
               match field.annotation:
                   case builtins.bool:
                       field_type = BooleanType()
                   case builtins.int:
                       field_type = IntegerType()
                   case datetime.date:
                       field_type = DateType()
                   case builtins.dict:
                       field_type = MapType()
                   case builtins.float:
                       field_type = FloatType()
                   case builtins.str:
                       field_type = StringType()
                   case datetime.datetime:
                       field_type = TimestampType()
                   case uuid.UUID:
                       field_type = UUIDType()
                   case _:
                       field_type = UnknownType()
   
               pyiceberg_fields.append(
                   NestedField(
                       field_id=index + 1,
                       field_type=field_type,
                       initial_default=default, # not working, unsure why.
                       name=name,
                       required=field.is_required(),
                   ),
               )
   
           return IcebergSchema(*pyiceberg_fields)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to