Xuanwo commented on code in PR #999: URL: https://github.com/apache/iceberg-rust/pull/999#discussion_r1967323466
########## crates/iceberg/src/lib.rs: ########## @@ -50,6 +50,87 @@ //! Ok(()) //! } //! ``` +//! +//! ## Fast append data to table +//! +//! ```rust, no_run +//! use std::sync::Arc; +//! +//! use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray}; +//! use async_trait::async_trait; +//! use iceberg::io::{FileIO, FileIOBuilder}; +//! use iceberg::spec::DataFile; +//! use iceberg::transaction::Transaction; +//! use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +//! use iceberg::writer::file_writer::location_generator::{ +//! DefaultFileNameGenerator, DefaultLocationGenerator, +//! }; +//! use iceberg::writer::file_writer::ParquetWriterBuilder; +//! use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; +//! use iceberg::{Catalog, Result, TableIdent}; +//! use iceberg_catalog_memory::MemoryCatalog; +//! use parquet::file::properties::WriterProperties; +//! #[tokio::main] +//! async fn main() -> Result<()> { +//! // Build your file IO. +//! let file_io = FileIOBuilder::new("memory").build()?; +//! // Connect to a catalog. +//! let catalog = MemoryCatalog::new(file_io, None); +//! // Load table from catalog. +//! let table = catalog +//! .load_table(&TableIdent::from_strs(["hello", "world"])?) +//! .await?; +//! +//! // Create the data file writer. +//! let schema: Arc<arrow_schema::Schema> = Arc::new( +//! table +//! .metadata() +//! .current_schema() +//! .as_ref() +//! .try_into() +//! .unwrap(), +//! ); +//! let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); +//! let file_name_generator = DefaultFileNameGenerator::new( +//! "test".to_string(), +//! None, +//! iceberg::spec::DataFileFormat::Parquet, +//! ); +//! let parquet_writer_builder = ParquetWriterBuilder::new( +//! WriterProperties::default(), +//! table.metadata().current_schema().clone(), +//! table.file_io().clone(), +//! location_generator.clone(), +//! file_name_generator.clone(), +//! ); +//! let data_file_writer_builder = DataFileWriterBuilder::new(parquet_writer_builder, None); +//! let mut data_file_writer = data_file_writer_builder.build().await.unwrap(); +//! +//! // Write new data. +//! let col1 = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]); +//! let col2 = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4)]); +//! let col3 = BooleanArray::from(vec![Some(true), Some(false), None, Some(false)]); +//! let batch = RecordBatch::try_new(schema.clone(), vec![ +//! Arc::new(col1) as ArrayRef, +//! Arc::new(col2) as ArrayRef, +//! Arc::new(col3) as ArrayRef, +//! ]) +//! .unwrap(); +//! data_file_writer.write(batch.clone()).await.unwrap(); +//! +//! // Close writer and get the DataFile. +//! let data_file = data_file_writer.close().await.unwrap(); +//! +//! // Append the DataFile. +//! let tx = Transaction::new(&table); +//! let mut fast_append = tx.fast_append(None, vec![]).unwrap(); Review Comment: I'm a bit confused about the API: `fast_append`. Why does it exist? And is there a `slow_append`? ########## crates/iceberg/src/lib.rs: ########## @@ -50,6 +50,87 @@ //! Ok(()) //! } //! ``` +//! +//! ## Fast append data to table +//! +//! ```rust, no_run +//! use std::sync::Arc; +//! +//! use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray}; +//! use async_trait::async_trait; +//! use iceberg::io::{FileIO, FileIOBuilder}; +//! use iceberg::spec::DataFile; +//! use iceberg::transaction::Transaction; +//! use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +//! use iceberg::writer::file_writer::location_generator::{ +//! DefaultFileNameGenerator, DefaultLocationGenerator, +//! }; +//! use iceberg::writer::file_writer::ParquetWriterBuilder; +//! use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; +//! use iceberg::{Catalog, Result, TableIdent}; +//! use iceberg_catalog_memory::MemoryCatalog; +//! use parquet::file::properties::WriterProperties; +//! #[tokio::main] +//! async fn main() -> Result<()> { +//! // Build your file IO. +//! let file_io = FileIOBuilder::new("memory").build()?; +//! // Connect to a catalog. +//! let catalog = MemoryCatalog::new(file_io, None); +//! // Load table from catalog. +//! let table = catalog +//! .load_table(&TableIdent::from_strs(["hello", "world"])?) +//! .await?; +//! +//! // Create the data file writer. +//! let schema: Arc<arrow_schema::Schema> = Arc::new( +//! table +//! .metadata() +//! .current_schema() +//! .as_ref() +//! .try_into() +//! .unwrap(), +//! ); +//! let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap(); Review Comment: We are returning `Result<()>` here, how about using `?` here for clean code? ########## crates/iceberg/src/lib.rs: ########## @@ -50,6 +50,87 @@ //! Ok(()) //! } //! ``` +//! +//! ## Fast append data to table +//! +//! ```rust, no_run Review Comment: This test only use `memory` file io and `memory` catalog, it should be able to run? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org