sungwy commented on code in PR #2176: URL: https://github.com/apache/iceberg-python/pull/2176#discussion_r2212019384
########## pyiceberg/partitioning.py: ########## @@ -272,6 +272,60 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre T = TypeVar("T") +class PartitionMap(Generic[T]): + _specs: dict[int, PartitionSpec] + _partition_maps: dict[int, dict[Record | None, T]] + + def __init__(self, specs: dict[int, PartitionSpec]): + self._specs = specs + self._partition_maps = {} + + def __len__(self) -> int: + """Return the length of the partition map. + + Returns: + length of _partition_maps + """ + return len(self.values()) + + def is_empty(self) -> bool: + return len(self._partition_maps.values()) == 0 + + def contains_key(self, spec_id: int, struct: Record) -> bool: + return self._partition_maps.get(spec_id) is not None + + def contains_value(self, value: T) -> bool: + return value in self._partition_maps.values() Review Comment: If we were to follow the [Java reference implementation](https://github.com/apache/iceberg/blob/026374b530eeea46429cef61a99ba9321e0587b6/core/src/main/java/org/apache/iceberg/util/PartitionMap.java#L89), we'd want to check if the `value` exists in all the contained partition map values. (`self._partition_maps` is a dict of a dict) ```suggestion return any(value in partition_map.values() for partition_map in self._partition_maps.values()) ``` ########## pyiceberg/partitioning.py: ########## @@ -272,6 +272,60 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre T = TypeVar("T") +class PartitionMap(Generic[T]): + _specs: dict[int, PartitionSpec] + _partition_maps: dict[int, dict[Record | None, T]] + + def __init__(self, specs: dict[int, PartitionSpec]): + self._specs = specs + self._partition_maps = {} + + def __len__(self) -> int: + """Return the length of the partition map. + + Returns: + length of _partition_maps + """ + return len(self.values()) + + def is_empty(self) -> bool: + return len(self._partition_maps.values()) == 0 + + def contains_key(self, spec_id: int, struct: Record) -> bool: + return self._partition_maps.get(spec_id) is not None Review Comment: Should we check if the struct key is present? ```suggestion def contains_key(self, spec_id: int, struct: Record) -> bool: return struct in self._partition_maps.get(spec_id, {}) ``` ########## pyiceberg/partitioning.py: ########## @@ -272,6 +272,60 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre T = TypeVar("T") +class PartitionMap(Generic[T]): + _specs: dict[int, PartitionSpec] + _partition_maps: dict[int, dict[Record | None, T]] + + def __init__(self, specs: dict[int, PartitionSpec]): + self._specs = specs + self._partition_maps = {} + + def __len__(self) -> int: + """Return the length of the partition map. + + Returns: + length of _partition_maps + """ + return len(self.values()) + + def is_empty(self) -> bool: + return len(self._partition_maps.values()) == 0 + + def contains_key(self, spec_id: int, struct: Record) -> bool: + return self._partition_maps.get(spec_id) is not None + + def contains_value(self, value: T) -> bool: + return value in self._partition_maps.values() + + def get(self, spec_id: int, struct: Record | None) -> Optional[T]: + if partition_map := self._partition_maps.get(spec_id): + if result := partition_map.get(struct): + return result + return None + + def put(self, spec_id: int, struct: Record | None, value: T) -> None: + if _ := self._specs.get(spec_id): + if spec_id not in self._partition_maps: + self._partition_maps[spec_id] = {struct: value} + else: + self._partition_maps[spec_id][struct] = value + + def compute_if_absent(self, spec_id: int, struct: Record, value: T, value_factory: Callable[[], T]) -> T: Review Comment: Are we using the `value` argument here? ```suggestion def compute_if_absent(self, spec_id: int, struct: Record, value_factory: Callable[[], T]) -> T: ``` ########## pyiceberg/partitioning.py: ########## @@ -272,6 +272,60 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre T = TypeVar("T") +class PartitionMap(Generic[T]): + _specs: dict[int, PartitionSpec] + _partition_maps: dict[int, dict[Record | None, T]] + + def __init__(self, specs: dict[int, PartitionSpec]): + self._specs = specs + self._partition_maps = {} + + def __len__(self) -> int: + """Return the length of the partition map. + + Returns: + length of _partition_maps + """ + return len(self.values()) + + def is_empty(self) -> bool: + return len(self._partition_maps.values()) == 0 Review Comment: If we are following the [Java implementation](https://github.com/apache/iceberg/blob/026374b530eeea46429cef61a99ba9321e0587b6/core/src/main/java/org/apache/iceberg/util/PartitionMap.java#L74), I think we'd want to check that the list of values of each partition map is empty, instead of the current behavior, which checks that the length of `self._partition_maps` is 0. ```suggestion return all(len(partition_map) == 0 for partition_map in self._partition_maps.values()) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org