rambleraptor commented on code in PR #2435: URL: https://github.com/apache/iceberg-python/pull/2435#discussion_r2403581285
########## pyiceberg/catalog/rest/planning_models.py: ########## @@ -0,0 +1,347 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import date +from typing import List, Literal, Optional, Union +from uuid import UUID + +from pydantic import Field + +from pyiceberg.catalog.rest.expression import Expression +from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel + + +class FieldName(IcebergRootModel[str]): + root: str = Field( + ..., + description="A full field name (including parent field names), such as those passed in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - Nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - Nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - Nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - Nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name", + ) + + +class BooleanTypeValue(IcebergRootModel[bool]): + root: bool = Field(..., json_schema_extra={"example": True}) + + +class IntegerTypeValue(IcebergRootModel[int]): + root: int = Field(..., json_schema_extra={"example": 42}) + + +class LongTypeValue(IcebergRootModel[int]): + root: int = Field(..., json_schema_extra={"example": 9223372036854775807}) + + +class FloatTypeValue(IcebergRootModel[float]): + root: float = Field(..., json_schema_extra={"example": 3.14}) + + +class DoubleTypeValue(IcebergRootModel[float]): + root: float = Field(..., json_schema_extra={"example": 123.456}) + + +class DecimalTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Decimal type values are serialized as strings. Decimals with a positive scale serialize as numeric plain text, while decimals with a negative scale use scientific notation and the exponent will be equal to the negated scale. For instance, a decimal with a positive scale is '123.4500', with zero scale is '2', and with a negative scale is '2E+20'", + json_schema_extra={"example": "123.4500"}, + ) + + +class StringTypeValue(IcebergRootModel[str]): + root: str = Field(..., json_schema_extra={"example": "hello"}) + + +class UUIDTypeValue(IcebergRootModel[UUID]): + root: UUID = Field( + ..., + description="UUID type values are serialized as a 36-character lowercase string in standard UUID format as specified by RFC-4122", + json_schema_extra={"example": "eb26bdb1-a1d8-4aa6-990e-da940875492c"}, + max_length=36, + min_length=36, + pattern="^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", + ) + + +class DateTypeValue(IcebergRootModel[date]): + root: date = Field( + ..., + description="Date type values follow the 'YYYY-MM-DD' ISO-8601 standard date format", + json_schema_extra={"example": "2007-12-03"}, + ) + + +class TimeTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Time type values follow the 'HH:MM:SS.ssssss' ISO-8601 format with microsecond precision", + json_schema_extra={"example": "22:31:08.123456"}, + ) + + +class TimestampTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Timestamp type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss' ISO-8601 format with microsecond precision", + json_schema_extra={"example": "2007-12-03T10:15:30.123456"}, + ) + + +class TimestampTzTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="TimestampTz type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss+00:00' ISO-8601 format with microsecond precision, and a timezone offset (+00:00 for UTC)", + json_schema_extra={"example": "2007-12-03T10:15:30.123456+00:00"}, + ) + + +class TimestampNanoTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss' ISO-8601 format with nanosecond precision", + json_schema_extra={"example": "2007-12-03T10:15:30.123456789"}, + ) + + +class TimestampTzNanoTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss+00:00' ISO-8601 format with nanosecond precision, and a timezone offset (+00:00 for UTC)", + json_schema_extra={"example": "2007-12-03T10:15:30.123456789+00:00"}, + ) + + +class FixedTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Fixed length type values are stored and serialized as an uppercase hexadecimal string preserving the fixed length", + json_schema_extra={"example": "78797A"}, + ) + + +class BinaryTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Binary type values are stored and serialized as an uppercase hexadecimal string", + json_schema_extra={"example": "78797A"}, + ) + + +class CountMap(IcebergBaseModel): + keys: Optional[List[IntegerTypeValue]] = Field(None, description="List of integer column ids for each corresponding value") + values: Optional[List[LongTypeValue]] = Field(None, description="List of Long values, matched to 'keys' by index") + + +class PrimitiveTypeValue( + IcebergRootModel[ + Union[ + BooleanTypeValue, + IntegerTypeValue, + LongTypeValue, + FloatTypeValue, + DoubleTypeValue, + DecimalTypeValue, + StringTypeValue, + UUIDTypeValue, + DateTypeValue, + TimeTypeValue, + TimestampTypeValue, + TimestampTzTypeValue, + TimestampNanoTypeValue, + TimestampTzNanoTypeValue, + FixedTypeValue, + BinaryTypeValue, + ] + ] +): + root: Union[ + BooleanTypeValue, + IntegerTypeValue, + LongTypeValue, + FloatTypeValue, + DoubleTypeValue, + DecimalTypeValue, + StringTypeValue, + UUIDTypeValue, + DateTypeValue, + TimeTypeValue, + TimestampTypeValue, + TimestampTzTypeValue, + TimestampNanoTypeValue, + TimestampTzNanoTypeValue, + FixedTypeValue, + BinaryTypeValue, + ] + + +class ValueMap(IcebergBaseModel): + keys: Optional[List[IntegerTypeValue]] = Field(None, description="List of integer column ids for each corresponding value") + values: Optional[List[PrimitiveTypeValue]] = Field( + None, description="List of primitive type values, matched to 'keys' by index" + ) + + +class PlanTableScanRequest(IcebergBaseModel): Review Comment: Yep, great idea. Added! ########## pyiceberg/catalog/rest/planning_models.py: ########## @@ -0,0 +1,347 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from datetime import date +from typing import List, Literal, Optional, Union +from uuid import UUID + +from pydantic import Field + +from pyiceberg.catalog.rest.expression import Expression +from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel + + +class FieldName(IcebergRootModel[str]): + root: str = Field( + ..., + description="A full field name (including parent field names), such as those passed in APIs like Java `Schema#findField(String name)`.\nThe nested field name follows these rules - Nested struct fields are named by concatenating field names at each struct level using dot (`.`) delimiter, e.g. employer.contact_info.address.zip_code - Nested fields in a map key are named using the keyword `key`, e.g. employee_address_map.key.first_name - Nested fields in a map value are named using the keyword `value`, e.g. employee_address_map.value.zip_code - Nested fields in a list are named using the keyword `element`, e.g. employees.element.first_name", + ) + + +class BooleanTypeValue(IcebergRootModel[bool]): + root: bool = Field(..., json_schema_extra={"example": True}) + + +class IntegerTypeValue(IcebergRootModel[int]): + root: int = Field(..., json_schema_extra={"example": 42}) + + +class LongTypeValue(IcebergRootModel[int]): + root: int = Field(..., json_schema_extra={"example": 9223372036854775807}) + + +class FloatTypeValue(IcebergRootModel[float]): + root: float = Field(..., json_schema_extra={"example": 3.14}) + + +class DoubleTypeValue(IcebergRootModel[float]): + root: float = Field(..., json_schema_extra={"example": 123.456}) + + +class DecimalTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Decimal type values are serialized as strings. Decimals with a positive scale serialize as numeric plain text, while decimals with a negative scale use scientific notation and the exponent will be equal to the negated scale. For instance, a decimal with a positive scale is '123.4500', with zero scale is '2', and with a negative scale is '2E+20'", + json_schema_extra={"example": "123.4500"}, + ) + + +class StringTypeValue(IcebergRootModel[str]): + root: str = Field(..., json_schema_extra={"example": "hello"}) + + +class UUIDTypeValue(IcebergRootModel[UUID]): + root: UUID = Field( + ..., + description="UUID type values are serialized as a 36-character lowercase string in standard UUID format as specified by RFC-4122", + json_schema_extra={"example": "eb26bdb1-a1d8-4aa6-990e-da940875492c"}, + max_length=36, + min_length=36, + pattern="^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", + ) + + +class DateTypeValue(IcebergRootModel[date]): + root: date = Field( + ..., + description="Date type values follow the 'YYYY-MM-DD' ISO-8601 standard date format", + json_schema_extra={"example": "2007-12-03"}, + ) + + +class TimeTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Time type values follow the 'HH:MM:SS.ssssss' ISO-8601 format with microsecond precision", + json_schema_extra={"example": "22:31:08.123456"}, + ) + + +class TimestampTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Timestamp type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss' ISO-8601 format with microsecond precision", + json_schema_extra={"example": "2007-12-03T10:15:30.123456"}, + ) + + +class TimestampTzTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="TimestampTz type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss+00:00' ISO-8601 format with microsecond precision, and a timezone offset (+00:00 for UTC)", + json_schema_extra={"example": "2007-12-03T10:15:30.123456+00:00"}, + ) + + +class TimestampNanoTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss' ISO-8601 format with nanosecond precision", + json_schema_extra={"example": "2007-12-03T10:15:30.123456789"}, + ) + + +class TimestampTzNanoTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss+00:00' ISO-8601 format with nanosecond precision, and a timezone offset (+00:00 for UTC)", + json_schema_extra={"example": "2007-12-03T10:15:30.123456789+00:00"}, + ) + + +class FixedTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Fixed length type values are stored and serialized as an uppercase hexadecimal string preserving the fixed length", + json_schema_extra={"example": "78797A"}, + ) + + +class BinaryTypeValue(IcebergRootModel[str]): + root: str = Field( + ..., + description="Binary type values are stored and serialized as an uppercase hexadecimal string", + json_schema_extra={"example": "78797A"}, + ) + + +class CountMap(IcebergBaseModel): + keys: Optional[List[IntegerTypeValue]] = Field(None, description="List of integer column ids for each corresponding value") + values: Optional[List[LongTypeValue]] = Field(None, description="List of Long values, matched to 'keys' by index") + + +class PrimitiveTypeValue( + IcebergRootModel[ + Union[ + BooleanTypeValue, + IntegerTypeValue, + LongTypeValue, + FloatTypeValue, + DoubleTypeValue, + DecimalTypeValue, + StringTypeValue, + UUIDTypeValue, + DateTypeValue, + TimeTypeValue, + TimestampTypeValue, + TimestampTzTypeValue, + TimestampNanoTypeValue, + TimestampTzNanoTypeValue, + FixedTypeValue, + BinaryTypeValue, + ] + ] +): + root: Union[ + BooleanTypeValue, + IntegerTypeValue, + LongTypeValue, + FloatTypeValue, + DoubleTypeValue, + DecimalTypeValue, + StringTypeValue, + UUIDTypeValue, + DateTypeValue, + TimeTypeValue, + TimestampTypeValue, + TimestampTzTypeValue, + TimestampNanoTypeValue, + TimestampTzNanoTypeValue, + FixedTypeValue, + BinaryTypeValue, + ] + + +class ValueMap(IcebergBaseModel): + keys: Optional[List[IntegerTypeValue]] = Field(None, description="List of integer column ids for each corresponding value") + values: Optional[List[PrimitiveTypeValue]] = Field( + None, description="List of primitive type values, matched to 'keys' by index" + ) + + +class PlanTableScanRequest(IcebergBaseModel): Review Comment: Yep, great idea. Added that model with tests! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
