Re: [PR] feat: Introduce basic file scan planning. [iceberg-rust]

via GitHub Wed, 03 Jan 2024 19:36:29 -0800


liurenjie1024 commented on code in PR #129:
URL: https://github.com/apache/iceberg-rust/pull/129#discussion_r1441250114



##########
crates/iceberg/src/scan.rs:
##########
@@ -0,0 +1,616 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Table scan api.
+
+use crate::io::FileIO;
+use crate::spec::{
+    DataContentType, ManifestContentType, ManifestEntry, ManifestEntryRef, 
SchemaRef, SnapshotRef,
+    TableMetadataRef, INITIAL_SEQUENCE_NUMBER,
+};
+use crate::table::Table;
+use crate::{Error, ErrorKind};
+use arrow_array::RecordBatch;
+use futures::stream::{iter, BoxStream};
+use futures::StreamExt;
+
+/// Builder to create table scan.
+pub struct TableScanBuilder<'a> {
+    table: &'a Table,
+    // Empty column names means to select all columns
+    column_names: Vec<String>,
+    snapshot_id: Option<i64>,
+}
+
+impl<'a> TableScanBuilder<'a> {
+    pub fn new(table: &'a Table) -> Self {
+        Self {
+            table,
+            column_names: vec![],
+            snapshot_id: None,
+        }
+    }
+
+    /// Select all columns.
+    pub fn select_all(mut self) -> Self {
+        self.column_names.clear();
+        self
+    }
+
+    /// Select some columns of the table.
+    pub fn select(mut self, column_names: impl IntoIterator<Item = impl 
ToString>) -> Self {
+        self.column_names = column_names
+            .into_iter()
+            .map(|item| item.to_string())
+            .collect();
+        self
+    }
+
+    /// Set the snapshot to scan. When not set, it uses current snapshot.
+    pub fn snapshot_id(mut self, snapshot_id: i64) -> Self {
+        self.snapshot_id = Some(snapshot_id);
+        self
+    }
+
+    /// Build the table scan.
+    pub fn build(self) -> crate::Result<TableScan> {
+        let snapshot = match self.snapshot_id {
+            Some(snapshot_id) => self
+                .table
+                .metadata()
+                .snapshot_by_id(snapshot_id)
+                .ok_or_else(|| {
+                    Error::new(
+                        ErrorKind::DataInvalid,
+                        format!("Snapshot with id {} not found", snapshot_id),
+                    )
+                })?
+                .clone(),
+            None => self
+                .table
+                .metadata()
+                .current_snapshot()
+                .ok_or_else(|| {
+                    Error::new(
+                        ErrorKind::FeatureUnsupported,
+                        "Can't scan table without snapshots",
+                    )
+                })?
+                .clone(),
+        };
+
+        let schema = snapshot.schema(self.table.metadata())?;
+
+        // Check that all column names exist in the schema.
+        if !self.column_names.is_empty() {
+            for column_name in &self.column_names {
+                if schema.field_by_name(column_name).is_none() {
+                    return Err(Error::new(
+                        ErrorKind::DataInvalid,
+                        format!("Column {} not found in table.", column_name),
+                    ));
+                }
+            }
+        }
+
+        Ok(TableScan {
+            snapshot,
+            file_io: self.table.file_io().clone(),
+            table_metadata: self.table.metadata_ref(),
+            column_names: self.column_names,
+            schema,
+        })
+    }
+}
+
+/// Table scan.
+#[derive(Debug)]
+#[allow(dead_code)]
+pub struct TableScan {
+    snapshot: SnapshotRef,
+    table_metadata: TableMetadataRef,
+    file_io: FileIO,
+    column_names: Vec<String>,
+    schema: SchemaRef,
+}
+
+/// A stream of [`FileScanTask`].
+pub type FileScanTaskStream = BoxStream<'static, crate::Result<FileScanTask>>;
+
+impl TableScan {
+    /// Returns a stream of file scan tasks.
+    pub async fn plan_files(&self) -> crate::Result<FileScanTaskStream> {
+        let manifest_list = self
+            .snapshot
+            .load_manifest_list(&self.file_io, &self.table_metadata)
+            .await?;
+
+        // Get minimum sequence number of data files.
+        let min_data_file_seq_num = manifest_list
+            .entries()
+            .iter()
+            .filter(|e| e.content == ManifestContentType::Data)
+            .map(|e| e.min_sequence_number)
+            .min()
+            .unwrap_or(INITIAL_SEQUENCE_NUMBER);
+
+        // Collect deletion files first.
+        let mut position_delete_files = 
Vec::with_capacity(manifest_list.entries().len());
+        let mut eq_delete_files = 
Vec::with_capacity(manifest_list.entries().len());
+
+        // TODO: We should introduce runtime api to enable parallel scan.
+        for manifest_list_entry in manifest_list.entries().iter().filter(|e| {
+            e.content == ManifestContentType::Deletes && e.sequence_number >= 
min_data_file_seq_num
+        }) {
+            let manifest_file = 
manifest_list_entry.load_manifest(&self.file_io).await?;
+
+            for manifest_entry in manifest_file.entries().iter().filter(|e| 
e.is_alive()) {
+                match manifest_entry.content_type() {
+                    DataContentType::PositionDeletes => {
+                        position_delete_files.push(manifest_entry.clone());
+                    }
+                    DataContentType::EqualityDeletes => {
+                        eq_delete_files.push(manifest_entry.clone());
+                    }
+                    DataContentType::Data => {
+                        return Err(Error::new(
+                            ErrorKind::DataInvalid,
+                            format!(
+                                "Data file entry({}) found in delete manifest 
file({})",
+                                manifest_entry.file_path(),
+                                manifest_list_entry.manifest_path
+                            ),
+                        ));
+                    }
+                }
+            }
+        }
+
+        // Sort delete files by sequence number.
+        position_delete_files
+            .sort_by_key(|f| 
f.sequence_number().unwrap_or(INITIAL_SEQUENCE_NUMBER));
+        eq_delete_files.sort_by_key(|f| 
f.sequence_number().unwrap_or(INITIAL_SEQUENCE_NUMBER));
+
+        // Generate data file stream
+        let mut file_scan_tasks = 
Vec::with_capacity(manifest_list.entries().len());
+        for manifest_list_entry in manifest_list
+            .entries()
+            .iter()
+            .filter(|e| e.content == ManifestContentType::Data)
+        {
+            // Data file
+            let manifest = 
manifest_list_entry.load_manifest(&self.file_io).await?;
+
+            for manifest_entry in manifest.entries() {
+                if manifest_entry.is_alive() {
+                    file_scan_tasks.push(Ok(FileScanTask {
+                        data_file: manifest_entry.clone(),
+                        position_delete_files: 
TableScan::filter_position_delete_files(
+                            manifest_entry,
+                            &position_delete_files,
+                        ),
+                        eq_delete_files: TableScan::filter_eq_delete_files(
+                            manifest_entry,
+                            &eq_delete_files,
+                        ),
+                        start: 0,
+                        length: manifest_entry.file_size_in_bytes(),
+                    }));
+                }
+            }
+        }
+
+        Ok(iter(file_scan_tasks).boxed())
+    }
+
+    /// Return the position delete files that should be applied to the data 
file.
+    ///
+    /// Here we assume that the position delete files are sorted by sequence 
number in ascending order.
+    fn filter_position_delete_files(
+        data_file: &ManifestEntry,
+        position_deletes: &[ManifestEntryRef],
+    ) -> Vec<ManifestEntryRef> {
+        let data_seq_num = data_file
+            .sequence_number()
+            .unwrap_or(INITIAL_SEQUENCE_NUMBER);
+
+        // Find the first position delete file whose sequence number is 
greater than or equal to the data file.
+        let first_entry = position_deletes.partition_point(|e| {
+            e.sequence_number().unwrap_or(INITIAL_SEQUENCE_NUMBER) < 
data_seq_num

Review Comment:
   Not exists after removing deletion files support.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] feat: Introduce basic file scan planning. [iceberg-rust]

Reply via email to