dev-goyal opened a new issue, #541:
URL: https://github.com/apache/iceberg-python/issues/541

   ### Apache Iceberg version
   
   0.6.0 (latest release)
   
   ### Please describe the bug 🐞
   
   Given a table like so:
   
   `In [36]: table
   Out[36]:
   matches(
    ...
     14: player_last_session: optional timestamptz,
   ...
     30: subject_last_session: optional timestamptz,
   ),
   partition by: [run_date, player_agg_cluster_name, initiating_at],
   sort order: [],
   snapshot: Operation.APPEND: id=6595288807809068528, schema_id=0`
   
   I get the following error
   
   `In [25]: table.scan().to_arrow()
   ---------------------------------------------------------------------------
   TypeError                                 Traceback (most recent call last)
   Cell In[25], line 1
   ----> 1 table.scan().to_arrow()
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/table/__init__.py:1418,
 in DataScan.to_arrow(self)
      1415 def to_arrow(self) -> pa.Table:
      1416     from pyiceberg.io.pyarrow import project_table
   -> 1418     return project_table(
      1419         self.plan_files(),
      1420         self.table,
      1421         self.row_filter,
      1422         self.projection(),
      1423         case_sensitive=self.case_sensitive,
      1424         limit=self.limit,
      1425     )
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:1114,
 in project_table(tasks, table, row_filter, projected_schema, case_sensitive, 
limit)
      1111 if limit is not None:
      1112     _ = [f.cancel() for f in futures if not f.done()]
   -> 1114 tables = [f.result() for f in completed_futures if f.result()]
      1116 if len(tables) < 1:
      1117     return pa.Table.from_batches([], 
schema=schema_to_pyarrow(projected_schema))
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:1114,
 in <listcomp>(.0)
      1111 if limit is not None:
      1112     _ = [f.cancel() for f in futures if not f.done()]
   -> 1114 tables = [f.result() for f in completed_futures if f.result()]
      1116 if len(tables) < 1:
      1117     return pa.Table.from_batches([], 
schema=schema_to_pyarrow(projected_schema))
   
   File 
~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/_base.py:449, in 
Future.result(self, timeout)
       447     raise CancelledError()
       448 elif self._state == FINISHED:
   --> 449     return self.__get_result()
       451 self._condition.wait(timeout)
       453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
   
   File 
~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/_base.py:401, in 
Future.__get_result(self)
       399 if self._exception:
       400     try:
   --> 401         raise self._exception
       402     finally:
       403         # Break a reference cycle with the exception in 
self._exception
       404         self = None
   
   File 
~/.pyenv/versions/3.11.7/lib/python3.11/concurrent/futures/thread.py:58, in 
_WorkItem.run(self)
        55     return
        57 try:
   ---> 58     result = self.fn(*self.args, **self.kwargs)
        59 except BaseException as exc:
        60     self.future.set_exception(exc)
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:957,
 in _task_to_table(fs, task, bound_row_filter, projected_schema, 
projected_field_ids, positional_deletes, case_sensitive, row_counts, limit, 
name_mapping)
       954 if metadata := physical_schema.metadata:
       955     schema_raw = metadata.get(ICEBERG_SCHEMA)
       956 file_schema = (
   --> 957     Schema.model_validate_json(schema_raw) if schema_raw is not None 
else pyarrow_to_schema(physical_schema, name_mapping)
       958 )
       960 pyarrow_filter = None
       961 if bound_row_filter is not AlwaysTrue():
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:655,
 in pyarrow_to_schema(schema, name_mapping)
       651 else:
       652     raise ValueError(
       653         "Parquet file does not have field-ids and the Iceberg table 
does not have 'schema.name-mapping.default' defined"
       654     )
   --> 655 return visit_pyarrow(schema, visitor)
   
   File ~/.pyenv/versions/3.11.7/lib/python3.11/functools.py:909, in 
singledispatch.<locals>.wrapper(*args, **kw)
       905 if not args:
       906     raise TypeError(f'{funcname} requires at least '
       907                     '1 positional argument')
   --> 909 return dispatch(args[0].__class__)(*args, **kw)
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:676,
 in _(obj, visitor)
       674 @visit_pyarrow.register(pa.Schema)
       675 def _(obj: pa.Schema, visitor: PyArrowSchemaVisitor[T]) -> T:
   --> 676     return visitor.schema(obj, visit_pyarrow(pa.struct(obj), 
visitor))
   
   File ~/.pyenv/versions/3.11.7/lib/python3.11/functools.py:909, in 
singledispatch.<locals>.wrapper(*args, **kw)
       905 if not args:
       906     raise TypeError(f'{funcname} requires at least '
       907                     '1 positional argument')
   --> 909 return dispatch(args[0].__class__)(*args, **kw)
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:685,
 in _(obj, visitor)
       683 for field in obj:
       684     visitor.before_field(field)
   --> 685     result = visit_pyarrow(field.type, visitor)
       686     results.append(visitor.field(field, result))
       687     visitor.after_field(field)
   
   File ~/.pyenv/versions/3.11.7/lib/python3.11/functools.py:909, in 
singledispatch.<locals>.wrapper(*args, **kw)
       905 if not args:
       906     raise TypeError(f'{funcname} requires at least '
       907                     '1 positional argument')
   --> 909 return dispatch(args[0].__class__)(*args, **kw)
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:718,
 in _(obj, visitor)
       716 if pa.types.is_nested(obj):
       717     raise TypeError(f"Expected primitive type, got: {type(obj)}")
   --> 718 return visitor.primitive(obj)
   
   File 
~/.pyenv/versions/3.11.7/envs/ml/lib/python3.11/site-packages/pyiceberg/io/pyarrow.py:891,
 in _ConvertToIceberg.primitive(self, primitive)
       888     primitive = cast(pa.FixedSizeBinaryType, primitive)
       889     return FixedType(primitive.byte_width)
   --> 891 raise TypeError(f"Unsupported type: {primitive}")
   
   TypeError: Unsupported type: timestamp[ns]`
   
   After some debugging, at 
[this](https://github.com/apache/iceberg-python/blob/6989b92c2d449beb9fe4817c64f619ea5bfc81dc/pyiceberg/io/pyarrow.py#L961)
 line I find
   
   `ipdb> physical_schema
   player_last_session: timestamp[ns]
   ...
   subject_last_session: timestamp[ns]
   `
   I imagine the fix is to do something like 
[this](https://stackoverflow.com/a/72111737/22359351) on 
[this](https://github.com/apache/iceberg-python/blob/6989b92c2d449beb9fe4817c64f619ea5bfc81dc/pyiceberg/io/pyarrow.py#L957)
 line, but currently those overrides are not exposed. Am I on the right track?
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to