dev-goyal opened a new issue, #541: URL: https://github.com/apache/iceberg-python/issues/541
### Apache Iceberg version 0.6.0 (latest release) ### Please describe the bug 🐞 Given a table like so: `In [36]: table Out[36]: matches( ... 14: player_last_session: optional timestamptz, ... 30: subject_last_session: optional timestamptz, ), partition by: [run_date, player_agg_cluster_name, initiating_at], sort order: [], snapshot: Operation.APPEND: id=6595288807809068528, schema_id=0` I get the following error `In [25]: table.scan().to_arrow() --------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[25], line 1 ----> 1 table.scan().to_arrow() File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/table/__init__.py:1418, in DataScan.to_arrow(self) 1415 def to_arrow(self) -> pa.Table: 1416 from pyiceberg.io.pyarrow import project_table -> 1418 return project_table( 1419 self.plan_files(), 1420 self.table, 1421 self.row_filter, 1422 self.projection(), 1423 case_sensitive=self.case_sensitive, 1424 limit=self.limit, 1425 ) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:1114, in project_table(tasks, table, row_filter, projected_schema, case_sensitive, limit) 1111 if limit is not None: 1112 _ = [f.cancel() for f in futures if not f.done()] -> 1114 tables = [f.result() for f in completed_futures if f.result()] 1116 if len(tables) < 1: 1117 return pa.Table.from_batches([], schema=schema_to_pyarrow(projected_schema)) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:1114, in <listcomp>(.0) 1111 if limit is not None: 1112 _ = [f.cancel() for f in futures if not f.done()] -> 1114 tables = [f.result() for f in completed_futures if f.result()] 1116 if len(tables) < 1: 1117 return pa.Table.from_batches([], schema=schema_to_pyarrow(projected_schema)) File ~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/_base.py:449, in Future.result(self, timeout) 447 raise CancelledError() 448 elif self._state == FINISHED: --> 449 return self.__get_result() 451 self._condition.wait(timeout) 453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]: File ~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self) 399 if self._exception: 400 try: --> 401 raise self._exception 402 finally: 403 # Break a reference cycle with the exception in self._exception 404 self = None File ~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/thread.py:58, in _WorkItem.run(self) 55 return 57 try: ---> 58 result = self.fn(*self.args, **self.kwargs) 59 except BaseException as exc: 60 self.future.set_exception(exc) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:957, in _task_to_table(fs, task, bound_row_filter, projected_schema, projected_field_ids, positional_deletes, case_sensitive, row_counts, limit, name_mapping) 954 if metadata := physical_schema.metadata: 955 schema_raw = metadata.get(ICEBERG_SCHEMA) 956 file_schema = ( --> 957 Schema.model_validate_json(schema_raw) if schema_raw is not None else pyarrow_to_schema(physical_schema, name_mapping) 958 ) 960 pyarrow_filter = None 961 if bound_row_filter is not AlwaysTrue(): File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:655, in pyarrow_to_schema(schema, name_mapping) 651 else: 652 raise ValueError( 653 "Parquet file does not have field-ids and the Iceberg table does not have 'schema.name-mapping.default' defined" 654 ) --> 655 return visit_pyarrow(schema, visitor) File ~/.pyenv/versions/3.11.7/lib/python3.11/functools.py:909, in singledispatch.<locals>.wrapper(*args, **kw) 905 if not args: 906 raise TypeError(f'{funcname} requires at least ' 907 '1 positional argument') --> 909 return dispatch(args[0].__class__)(*args, **kw) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:676, in _(obj, visitor) 674 @visit_pyarrow.register(pa.Schema) 675 def _(obj: pa.Schema, visitor: PyArrowSchemaVisitor[T]) -> T: --> 676 return visitor.schema(obj, visit_pyarrow(pa.struct(obj), visitor)) File ~/.pyenv/versions/3.11.7/lib/python3.11/functools.py:909, in singledispatch.<locals>.wrapper(*args, **kw) 905 if not args: 906 raise TypeError(f'{funcname} requires at least ' 907 '1 positional argument') --> 909 return dispatch(args[0].__class__)(*args, **kw) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:685, in _(obj, visitor) 683 for field in obj: 684 visitor.before_field(field) --> 685 result = visit_pyarrow(field.type, visitor) 686 results.append(visitor.field(field, result)) 687 visitor.after_field(field) File ~/.pyenv/versions/3.11.7/lib/python3.11/functools.py:909, in singledispatch.<locals>.wrapper(*args, **kw) 905 if not args: 906 raise TypeError(f'{funcname} requires at least ' 907 '1 positional argument') --> 909 return dispatch(args[0].__class__)(*args, **kw) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:718, in _(obj, visitor) 716 if pa.types.is_nested(obj): 717 raise TypeError(f"Expected primitive type, got: {type(obj)}") --> 718 return visitor.primitive(obj) File ~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:891, in _ConvertToIceberg.primitive(self, primitive) 888 primitive = cast(pa.FixedSizeBinaryType, primitive) 889 return FixedType(primitive.byte_width) --> 891 raise TypeError(f"Unsupported type: {primitive}") TypeError: Unsupported type: timestamp[ns]` After some debugging, at [this](https://github.com/apache/iceberg-python/blob/6989b92c2d449beb9fe4817c64f619ea5bfc81dc/pyiceberg/io/pyarrow.py#L961) line I find `ipdb> physical_schema player_last_session: timestamp[ns] ... subject_last_session: timestamp[ns] ` I imagine the fix is to do something like [this](https://stackoverflow.com/a/72111737/22359351) on [this](https://github.com/apache/iceberg-python/blob/6989b92c2d449beb9fe4817c64f619ea5bfc81dc/pyiceberg/io/pyarrow.py#L957) line, but currently those overrides are not exposed. Am I on the right track? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org