kevinjqliu commented on code in PR #1983: URL: https://github.com/apache/iceberg-python/pull/1983#discussion_r2081953874
########## pyiceberg/io/pyarrow.py: ########## @@ -636,7 +636,13 @@ def visit_fixed(self, fixed_type: FixedType) -> pa.DataType: return pa.binary(len(fixed_type)) def visit_decimal(self, decimal_type: DecimalType) -> pa.DataType: - return pa.decimal128(decimal_type.precision, decimal_type.scale) + return ( + pa.decimal32(decimal_type.precision, decimal_type.scale) + if decimal_type.precision <= 9 + else pa.decimal64(decimal_type.precision, decimal_type.scale) + if decimal_type.precision <= 18 + else pa.decimal128(decimal_type.precision, decimal_type.scale) Review Comment: """ Scale is fixed, precision must be 38 or less """ from https://iceberg.apache.org/spec/#primitive-types  ########## pyiceberg/io/pyarrow.py: ########## @@ -2442,7 +2448,9 @@ def write_parquet(task: WriteTask) -> DataFile: ) fo = io.new_output(file_path) with fo.create(overwrite=True) as fos: - with pq.ParquetWriter(fos, schema=arrow_table.schema, **parquet_writer_kwargs) as writer: + with pq.ParquetWriter( + fos, schema=arrow_table.schema, store_decimal_as_integer=True, **parquet_writer_kwargs Review Comment: """ By default, this is DISABLED and all decimal types annotate fixed_len_byte_array. When enabled, the writer will use the following physical types to store decimals: - int32: for 1 <= precision <= 9. - int64: for 10 <= precision <= 18. - fixed_len_byte_array: for precision > 18. """ from https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html  ########## pyiceberg/io/pyarrow.py: ########## @@ -636,7 +636,13 @@ def visit_fixed(self, fixed_type: FixedType) -> pa.DataType: return pa.binary(len(fixed_type)) def visit_decimal(self, decimal_type: DecimalType) -> pa.DataType: - return pa.decimal128(decimal_type.precision, decimal_type.scale) + return ( + pa.decimal32(decimal_type.precision, decimal_type.scale) + if decimal_type.precision <= 9 + else pa.decimal64(decimal_type.precision, decimal_type.scale) + if decimal_type.precision <= 18 + else pa.decimal128(decimal_type.precision, decimal_type.scale) Review Comment: pyarrow.decimal128 supports up to precision 38 https://arrow.apache.org/docs/python/generated/pyarrow.decimal128.html#pyarrow-decimal128 ########## pyiceberg/io/pyarrow.py: ########## @@ -2442,7 +2448,9 @@ def write_parquet(task: WriteTask) -> DataFile: ) fo = io.new_output(file_path) with fo.create(overwrite=True) as fos: - with pq.ParquetWriter(fos, schema=arrow_table.schema, **parquet_writer_kwargs) as writer: + with pq.ParquetWriter( + fos, schema=arrow_table.schema, store_decimal_as_integer=True, **parquet_writer_kwargs Review Comment: this matches the parquet data type mapping for decimal https://iceberg.apache.org/spec/#parquet  -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org