Fokko commented on code in PR #6131:
URL: https://github.com/apache/iceberg/pull/6131#discussion_r1018441052


##########
python/pyiceberg/table/__init__.py:
##########
@@ -90,3 +103,90 @@ def snapshot_by_name(self, name: str) -> Optional[Snapshot]:
     def history(self) -> List[SnapshotLogEntry]:
         """Get the snapshot history of this table."""
         return self.metadata.snapshot_log
+
+
+class TableScan:
+    _always_true: ClassVar[BooleanExpression] = AlwaysTrue()
+    table: Table
+    row_filter: BooleanExpression
+    partition_filter: BooleanExpression
+    selected_fields: tuple[str]
+    case_sensitive: bool
+    snapshot_id: Optional[int]
+    options: Properties
+
+    def __init__(
+        self,
+        *,
+        table: Table,
+        row_filter: BooleanExpression = _always_true,
+        partition_filter: BooleanExpression = _always_true,
+        selected_fields: tuple[str] = ("*",),
+        case_sensitive: bool = True,
+        snapshot_id: Optional[int] = None,
+        options: Properties = EMPTY_DICT,
+    ):
+        self.table = table
+        self.row_filter = row_filter
+        self.partition_filter = partition_filter
+        self.selected_fields = selected_fields
+        self.case_sensitive = case_sensitive
+        self.snapshot_id = snapshot_id
+        self.options = options
+
+    def update(self, **overrides):
+        """Creates a copy of this table scan with updated fields."""
+        return TableScan(**{**self.__dict__, **overrides})
+
+    def snapshot(self):
+        if self.snapshot_id:
+            return self.table.snapshot_by_id(self.snapshot_id)
+
+        return self.table.current_snapshot()
+
+    def projection(self):
+        snapshot_schema = self.table.schemas().get(self.snapshot().schema_id) 
or self.table.schema()
+
+        if "*" in self.selected_fields:
+            return snapshot_schema
+
+        return snapshot_schema.select(*self.selected_fields, 
case_sensitive=self.case_sensitive)
+
+    def use_snapshot(self, snapshot_id: int):
+        if self.snapshot_id:
+            raise ValueError(f"Cannot override snapshot, already set snapshot 
id={self.snapshot_id}")
+        if self.table.snapshot_by_id(snapshot_id):
+            return self.update(snapshot_id=snapshot_id)
+
+        raise ValueError(f"Cannot scan unknown snapshot id={snapshot_id}")
+
+    def use_ref(self, name: str):
+        if self.snapshot_id:
+            raise ValueError(f"Cannot override ref, already set snapshot 
id={self.snapshot_id}")
+        if snapshot := self.table.snapshot_by_name(name):
+            return self.update(snapshot_id=snapshot.snapshot_id)
+
+        raise ValueError(f"Cannot scan unknown ref={name}")
+
+    def select(self, *field_names: str) -> "TableScan":
+        if "*" in self.selected_fields:
+            return self.update(selected_fields=field_names)
+        return 
self.update(selected_fields=tuple(set(self.selected_fields).intersection(field_names)))

Review Comment:
   Small bug:
   ```suggestion
           return 
self.update(selected_fields=tuple(set(self.selected_fields).intersection(set(field_names))))
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to