rdblue commented on code in PR #6139: URL: https://github.com/apache/iceberg/pull/6139#discussion_r1024482375
########## python/pyiceberg/expressions/__init__.py: ########## @@ -281,249 +361,313 @@ def __invert__(self) -> BoundIsNull: return BoundIsNull(self.term) -@dataclass(frozen=True) -class IsNull(UnaryPredicate[T]): - as_bound = BoundIsNull - - def __invert__(self) -> NotNull[T]: +class IsNull(UnaryPredicate): + def __invert__(self) -> NotNull: return NotNull(self.term) + @property + def as_bound(self) -> Type[BoundIsNull[L]]: + return BoundIsNull[L] -@dataclass(frozen=True) -class NotNull(UnaryPredicate[T]): - as_bound = BoundNotNull - def __invert__(self) -> IsNull[T]: +class NotNull(UnaryPredicate): + def __invert__(self) -> IsNull: return IsNull(self.term) + @property + def as_bound(self) -> Type[BoundNotNull[L]]: + return BoundNotNull[L] -@dataclass(frozen=True) -class BoundIsNaN(BoundUnaryPredicate[T]): - def __new__(cls, term: BoundTerm[T]): # pylint: disable=W0221 + +class BoundIsNaN(BoundUnaryPredicate[L]): + def __new__(cls, term: BoundTerm[L]): # pylint: disable=W0221 bound_type = term.ref().field.field_type if type(bound_type) in {FloatType, DoubleType}: return super().__new__(cls) return AlwaysFalse() - def __invert__(self) -> BoundNotNaN[T]: + def __invert__(self) -> BoundNotNaN[L]: return BoundNotNaN(self.term) -@dataclass(frozen=True) -class BoundNotNaN(BoundUnaryPredicate[T]): - def __new__(cls, term: BoundTerm[T]): # pylint: disable=W0221 +class BoundNotNaN(BoundUnaryPredicate[L]): + def __new__(cls, term: BoundTerm[L]): # pylint: disable=W0221 bound_type = term.ref().field.field_type if type(bound_type) in {FloatType, DoubleType}: return super().__new__(cls) return AlwaysTrue() - def __invert__(self) -> BoundIsNaN[T]: + def __invert__(self) -> BoundIsNaN[L]: return BoundIsNaN(self.term) -@dataclass(frozen=True) -class IsNaN(UnaryPredicate[T]): - as_bound = BoundIsNaN - - def __invert__(self) -> NotNaN[T]: +class IsNaN(UnaryPredicate): + def __invert__(self) -> NotNaN: return NotNaN(self.term) + @property + def as_bound(self) -> Type[BoundIsNaN[L]]: + return BoundIsNaN[L] -@dataclass(frozen=True) -class NotNaN(UnaryPredicate[T]): - as_bound = BoundNotNaN - def __invert__(self) -> IsNaN[T]: +class NotNaN(UnaryPredicate): + def __invert__(self) -> IsNaN: return IsNaN(self.term) + @property + def as_bound(self) -> Type[BoundNotNaN[L]]: + return BoundNotNaN[L] -@dataclass(frozen=True) -class SetPredicate(UnboundPredicate[T]): - literals: tuple[Literal[T], ...] - def __invert__(self) -> SetPredicate[T]: - """Inverted expression of the SetPredicate""" - raise NotImplementedError +class SetPredicate(Generic[L], UnboundPredicate, ABC): + literals: Set[Literal[L]] - def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression: + def __init__(self, term: Union[str, UnboundTerm], literals: Union[Iterable[L], Iterable[Literal[L]]]): + super().__init__(term) + self.literals = _to_literal_set(literals) + + def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundSetPredicate[L]: bound_term = self.term.bind(schema, case_sensitive) return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals}) + def __str__(self): + # Sort to make it deterministic + return f"{str(self.__class__.__name__)}({str(self.term)}, {{{', '.join(sorted([str(literal) for literal in self.literals]))}}})" + + def __repr__(self) -> str: + # Sort to make it deterministic + return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{', '.join(sorted([repr(literal) for literal in self.literals]))}}})" + + def __eq__(self, other: Any) -> bool: + return self.term == other.term and self.literals == other.literals if isinstance(other, SetPredicate) else False + + @property + @abstractmethod + def as_bound(self) -> Type[BoundSetPredicate[L]]: + return BoundSetPredicate[L] + + +class BoundSetPredicate(BoundPredicate[L], ABC): + literals: Set[Literal[L]] -@dataclass(frozen=True) -class BoundSetPredicate(BoundPredicate[T]): - literals: set[Literal[T]] + def __init__(self, term: BoundTerm[L], literals: Union[Iterable[L], Iterable[Literal[L]]]): + # Since we don't know the type of BoundPredicate[L], we have to ignore this one + super().__init__(term) # type: ignore + self.literals = _to_literal_set(literals) # pylint: disable=W0621 - def __invert__(self) -> BoundSetPredicate[T]: - """Inverted expression of the SetPredicate""" - raise NotImplementedError + def __str__(self): + # Sort to make it deterministic + return f"{str(self.__class__.__name__)}({str(self.term)}, {{{', '.join(sorted([str(literal) for literal in self.literals]))}}})" + def __repr__(self) -> str: + # Sort to make it deterministic + return f"{str(self.__class__.__name__)}({repr(self.term)}, {{{', '.join(sorted([repr(literal) for literal in self.literals]))}}})" -@dataclass(frozen=True) -class BoundIn(BoundSetPredicate[T]): - def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]): # pylint: disable=W0221 - count = len(literals) + def __eq__(self, other: Any) -> bool: + return self.term == other.term and self.literals == other.literals if isinstance(other, BoundSetPredicate) else False + + +class BoundIn(BoundSetPredicate[L]): + def __new__(cls, term: BoundTerm[L], literals: Union[Iterable, Iterable[Literal[L]]]): # pylint: disable=W0221 Review Comment: I think that `literals` should just be `Iterable[Literal[L]]` and not the union -- we don't need to support passing non-literal values here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org