ZENOTME commented on code in PR #837:
URL: https://github.com/apache/iceberg-rust/pull/837#discussion_r1897104456
##########
crates/iceberg/src/writer/file_writer/parquet_writer.rs:
##########
@@ -1169,4 +1172,187 @@ mod tests {
Ok(())
}
+
+ #[tokio::test]
+ async fn test_decimal_bound() -> Result<()> {
+ let temp_dir = TempDir::new().unwrap();
+ let file_io = FileIOBuilder::new_fs_io().build().unwrap();
+ let loccation_gen =
+
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
+ let file_name_gen =
+ DefaultFileNameGenerator::new("test".to_string(), None,
DataFileFormat::Parquet);
+
+ // test 1.1 and 2.2
+ let schema = Arc::new(
+ Schema::builder()
+ .with_fields(vec![NestedField::optional(
+ 0,
+ "decimal",
+ Type::Primitive(PrimitiveType::Decimal {
+ precision: 28,
+ scale: 10,
+ }),
+ )
+ .into()])
+ .build()
+ .unwrap(),
+ );
+ let arrow_schema: ArrowSchemaRef =
Arc::new(schema_to_arrow_schema(&schema).unwrap());
+ let mut pw = ParquetWriterBuilder::new(
+ WriterProperties::builder().build(),
+ schema.clone(),
+ file_io.clone(),
+ loccation_gen.clone(),
+ file_name_gen.clone(),
+ )
+ .build()
+ .await?;
+ let col0 = Arc::new(
+ Decimal128Array::from(vec![Some(22000000000), Some(11000000000)])
+ .with_data_type(DataType::Decimal128(28, 10)),
+ ) as ArrayRef;
+ let to_write = RecordBatch::try_new(arrow_schema.clone(),
vec![col0]).unwrap();
+ pw.write(&to_write).await?;
+ let res = pw.close().await?;
+ assert_eq!(res.len(), 1);
+ let data_file = res
+ .into_iter()
+ .next()
+ .unwrap()
+ .content(crate::spec::DataContentType::Data)
+ .partition(Struct::empty())
+ .build()
+ .unwrap();
+ assert_eq!(
+ data_file.upper_bounds().get(&0),
+ Some(Datum::decimal_with_precision(Decimal::new(22000000000_i64,
10), 28).unwrap())
+ .as_ref()
+ );
+ assert_eq!(
+ data_file.lower_bounds().get(&0),
+ Some(Datum::decimal_with_precision(Decimal::new(11000000000_i64,
10), 28).unwrap())
+ .as_ref()
+ );
+
+ // test -1.1 and -2.2
+ let schema = Arc::new(
+ Schema::builder()
+ .with_fields(vec![NestedField::optional(
+ 0,
+ "decimal",
+ Type::Primitive(PrimitiveType::Decimal {
+ precision: 28,
+ scale: 10,
+ }),
+ )
+ .into()])
+ .build()
+ .unwrap(),
+ );
+ let arrow_schema: ArrowSchemaRef =
Arc::new(schema_to_arrow_schema(&schema).unwrap());
+ let mut pw = ParquetWriterBuilder::new(
+ WriterProperties::builder().build(),
+ schema.clone(),
+ file_io.clone(),
+ loccation_gen.clone(),
+ file_name_gen.clone(),
+ )
+ .build()
+ .await?;
+ let col0 = Arc::new(
+ Decimal128Array::from(vec![Some(-22000000000), Some(-11000000000)])
+ .with_data_type(DataType::Decimal128(28, 10)),
+ ) as ArrayRef;
+ let to_write = RecordBatch::try_new(arrow_schema.clone(),
vec![col0]).unwrap();
+ pw.write(&to_write).await?;
+ let res = pw.close().await?;
+ assert_eq!(res.len(), 1);
+ let data_file = res
+ .into_iter()
+ .next()
+ .unwrap()
+ .content(crate::spec::DataContentType::Data)
+ .partition(Struct::empty())
+ .build()
+ .unwrap();
+ assert_eq!(
+ data_file.upper_bounds().get(&0),
+ Some(Datum::decimal_with_precision(Decimal::new(-11000000000_i64,
10), 28).unwrap())
+ .as_ref()
+ );
+ assert_eq!(
+ data_file.lower_bounds().get(&0),
+ Some(Datum::decimal_with_precision(Decimal::new(-22000000000_i64,
10), 28).unwrap())
+ .as_ref()
+ );
+
+ // test max and min for scale 38
+ let schema = Arc::new(
+ Schema::builder()
+ .with_fields(vec![NestedField::optional(
+ 0,
+ "decimal",
+ Type::Primitive(PrimitiveType::Decimal {
+ precision: 38,
+ scale: 0,
+ }),
+ )
+ .into()])
+ .build()
+ .unwrap(),
+ );
+ let arrow_schema: ArrowSchemaRef =
Arc::new(schema_to_arrow_schema(&schema).unwrap());
+ let mut pw = ParquetWriterBuilder::new(
+ WriterProperties::builder().build(),
+ schema,
+ file_io.clone(),
+ loccation_gen,
+ file_name_gen,
+ )
+ .build()
+ .await?;
+ let col0 = Arc::new(
+ Decimal128Array::from(vec![
+ Some(99999999999999999999999999999999999999_i128),
+ Some(-99999999999999999999999999999999999999_i128),
+ ])
+ .with_data_type(DataType::Decimal128(38, 0)),
+ ) as ArrayRef;
+ let to_write = RecordBatch::try_new(arrow_schema.clone(),
vec![col0]).unwrap();
+ pw.write(&to_write).await?;
+ let res = pw.close().await?;
+ assert_eq!(res.len(), 1);
+ let data_file = res
+ .into_iter()
+ .next()
+ .unwrap()
+ .content(crate::spec::DataContentType::Data)
+ .partition(Struct::empty())
+ .build()
+ .unwrap();
+ assert_eq!(
+ data_file.upper_bounds().get(&0),
+ Some(Datum::new(
+ PrimitiveType::Decimal {
+ precision: 38,
+ scale: 0
+ },
+
PrimitiveLiteral::Int128(99999999999999999999999999999999999999_i128)
Review Comment:
Actually, according to my understanding, our decimal is not restricted by
`rust_decimal`.🤔 Internally iceberg have its own decimal representation.
For here, we just provide a way let the user pass a `rust_decimal` and
convert it into our decimal representation. If the rust_decimal is that it only
supports up to 28 digits of precision, when user want to more precision, they
can pass other format to convert it into our decimal representation. Or as
#669, we can provide a way to convert other decimal which support more
precision to our decimal representation.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]