Re: [PR] feat(datafusion): implement the project node to add the partition columns [iceberg-rust]

via GitHub Sat, 18 Oct 2025 02:18:13 -0700


fvaleye commented on code in PR #1602:
URL: https://github.com/apache/iceberg-rust/pull/1602#discussion_r2379156968



##########
crates/integrations/datafusion/src/physical_plan/project.rs:
##########
@@ -15,125 +15,203 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Utilities for calculating partition values for Iceberg tables.
-//!
-//! This module provides functions to calculate partition values from record 
batches
-//! based on Iceberg partition specifications. These utilities are used when 
writing
-//! data to partitioned Iceberg tables.
+//! Partition value projection for Iceberg tables.
 
 use std::sync::Arc;
 
 use datafusion::arrow::array::{ArrayRef, RecordBatch, StructArray};
-use datafusion::arrow::datatypes::{
-    DataType, Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
-};
+use datafusion::arrow::datatypes::{DataType, Schema as ArrowSchema};
 use datafusion::common::Result as DFResult;
 use datafusion::error::DataFusionError;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_expr::expressions::Column;
+use datafusion::physical_plan::projection::ProjectionExec;
+use datafusion::physical_plan::{ColumnarValue, ExecutionPlan};
 use iceberg::spec::{PartitionSpec, Schema};
+use iceberg::table::Table;
 
 use crate::to_datafusion_error;
 
 /// Column name for the combined partition values struct
-#[allow(dead_code)]
-pub(crate) const PARTITION_VALUES_COLUMN: &str = "_iceberg_partition_values";
+const PARTITION_VALUES_COLUMN: &str = "_partition";
 
-/// Create an output schema by adding a single partition values struct column 
to the input schema.
-/// Returns the original schema unchanged if the table is unpartitioned.
+/// Extends an ExecutionPlan with partition value calculations for Iceberg 
tables.
+///
+/// This function takes an input ExecutionPlan and extends it with an 
additional column
+/// containing calculated partition values based on the table's partition 
specification.
+/// For unpartitioned tables, returns the original plan unchanged.
+///
+/// # Arguments
+/// * `input` - The input ExecutionPlan to extend
+/// * `table` - The Iceberg table with partition specification
+///
+/// # Returns
+/// * `Ok(Arc<dyn ExecutionPlan>)` - Extended plan with partition values column
+/// * `Err` - If partition spec is not found or transformation fails
 #[allow(dead_code)]
-pub(crate) fn create_schema_with_partition_columns(
-    input_schema: &ArrowSchema,
-    partition_spec: &PartitionSpec,
-    table_schema: &Schema,
-) -> DFResult<ArrowSchemaRef> {
+pub fn project_with_partition(
+    input: Arc<dyn ExecutionPlan>,
+    table: &Table,
+) -> DFResult<Arc<dyn ExecutionPlan>> {
+    let metadata = table.metadata();
+    let partition_spec = metadata

Review Comment:
   Yes! Thx!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(datafusion): implement the project node to add the partition columns [iceberg-rust]

Reply via email to