Copilot commented on code in PR #18176:
URL: https://github.com/apache/datafusion/pull/18176#discussion_r2445332864


##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for 
ProjectionExpr {
     }
 }
 
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(Arc::clone(&value.0), value.1.clone())
+    }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+    fn from(value: ProjectionExpr) -> Self {
+        (value.expr, value.alias)
+    }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+    exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+    fn from(value: Vec<ProjectionExpr>) -> Self {
+        Self { exprs: value }
+    }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+    fn from(value: &[ProjectionExpr]) -> Self {
+        Self {
+            exprs: value.to_vec(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+    fn as_ref(&self) -> &[ProjectionExpr] {
+        &self.exprs
+    }
+}
+
+impl Projection {
+    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+        Self { exprs }
+    }
+
+    /// Apply another projection on top of this projection, returning the 
combined projection.
+    /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as 
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+    /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 
as c2`.
+    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+        let mut new_exprs = Vec::with_capacity(other.exprs.len());
+        for proj_expr in &other.exprs {
+            let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Failed to combine projections: expression {} could 
not be updated",
+                        proj_expr.expr
+                    )
+                })?;
+            new_exprs.push(ProjectionExpr {
+                expr: new_expr,
+                alias: proj_expr.alias.clone(),
+            });
+        }
+        Ok(Projection::new(new_exprs))
+    }
+
+    /// Merge an iterator of projections into a single projection.
+    /// For example, if the projections are:
+    /// 1. `SELECT c@2 AS x, b@1 AS y, a@0 as z`
+    /// 2. `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`
+    /// 3. `SELECT c1@0 * 2 AS final_c1`
+    /// we return a projection equivalent to `SELECT (c@2 + 1) * 2 AS 
final_c1`.
+    pub fn try_merge_iter<I>(projections: I) -> Result<Projection>
+    where
+        I: IntoIterator<Item = Projection>,
+    {
+        let mut iter = projections.into_iter();
+        let first = iter
+            .next()
+            .ok_or_else(|| internal_datafusion_err!("No projections to 
merge"))?;
+        iter.try_fold(first, |acc, proj| acc.try_merge(&proj))
+    }
+
+    /// Extract the column indices used in this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` 
is at index 0 and `b` is at index 1,
+    /// this function would return `[0, 1]`.
+    /// Repeated indices are returned only once, and the order is ascending.
+    pub fn column_indices(&self) -> Vec<usize> {
+        self.exprs
+            .iter()
+            .map(|e| collect_columns(&e.expr).into_iter().map(|col| 
col.index()))
+            .flatten()

Review Comment:
   Replace `.flatten()` with `.flat_map()` for better performance and 
readability. The current two-step approach of `.map().flatten()` can be 
combined into a single `.flat_map()` operation.
   ```suggestion
               .flat_map(|e| collect_columns(&e.expr).into_iter().map(|col| 
col.index()))
   ```



##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for 
ProjectionExpr {
     }
 }
 
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(Arc::clone(&value.0), value.1.clone())
+    }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+    fn from(value: ProjectionExpr) -> Self {
+        (value.expr, value.alias)
+    }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+    exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+    fn from(value: Vec<ProjectionExpr>) -> Self {
+        Self { exprs: value }
+    }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+    fn from(value: &[ProjectionExpr]) -> Self {
+        Self {
+            exprs: value.to_vec(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+    fn as_ref(&self) -> &[ProjectionExpr] {
+        &self.exprs
+    }
+}
+
+impl Projection {
+    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+        Self { exprs }
+    }
+
+    /// Apply another projection on top of this projection, returning the 
combined projection.
+    /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as 
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+    /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 
as c2`.
+    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+        let mut new_exprs = Vec::with_capacity(other.exprs.len());
+        for proj_expr in &other.exprs {
+            let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Failed to combine projections: expression {} could 
not be updated",
+                        proj_expr.expr
+                    )
+                })?;
+            new_exprs.push(ProjectionExpr {
+                expr: new_expr,
+                alias: proj_expr.alias.clone(),
+            });
+        }
+        Ok(Projection::new(new_exprs))
+    }
+
+    /// Merge an iterator of projections into a single projection.
+    /// For example, if the projections are:
+    /// 1. `SELECT c@2 AS x, b@1 AS y, a@0 as z`
+    /// 2. `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`
+    /// 3. `SELECT c1@0 * 2 AS final_c1`
+    /// we return a projection equivalent to `SELECT (c@2 + 1) * 2 AS 
final_c1`.
+    pub fn try_merge_iter<I>(projections: I) -> Result<Projection>
+    where
+        I: IntoIterator<Item = Projection>,
+    {
+        let mut iter = projections.into_iter();
+        let first = iter
+            .next()
+            .ok_or_else(|| internal_datafusion_err!("No projections to 
merge"))?;
+        iter.try_fold(first, |acc, proj| acc.try_merge(&proj))
+    }
+
+    /// Extract the column indices used in this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` 
is at index 0 and `b` is at index 1,
+    /// this function would return `[0, 1]`.
+    /// Repeated indices are returned only once, and the order is ascending.
+    pub fn column_indices(&self) -> Vec<usize> {
+        self.exprs
+            .iter()
+            .map(|e| collect_columns(&e.expr).into_iter().map(|col| 
col.index()))
+            .flatten()
+            .sorted_unstable()
+            .dedup()
+            .collect_vec()

Review Comment:
   The combination of `.sorted_unstable().dedup()` is inefficient. Consider 
using `.unique()` from itertools (already imported) which handles both 
deduplication and sorting in a single pass, or use a `HashSet` to collect 
unique indices and then sort once.
   ```suggestion
           use std::collections::HashSet;
           let mut indices: HashSet<usize> = self.exprs
               .iter()
               .flat_map(|e| collect_columns(&e.expr).into_iter().map(|col| 
col.index()))
               .collect();
           let mut indices_vec: Vec<usize> = indices.into_iter().collect();
           indices_vec.sort_unstable();
           indices_vec
   ```



##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for 
ProjectionExpr {
     }
 }
 
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(Arc::clone(&value.0), value.1.clone())
+    }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+    fn from(value: ProjectionExpr) -> Self {
+        (value.expr, value.alias)
+    }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+    exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+    fn from(value: Vec<ProjectionExpr>) -> Self {
+        Self { exprs: value }
+    }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+    fn from(value: &[ProjectionExpr]) -> Self {
+        Self {
+            exprs: value.to_vec(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+    fn as_ref(&self) -> &[ProjectionExpr] {
+        &self.exprs
+    }
+}
+
+impl Projection {
+    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+        Self { exprs }
+    }
+
+    /// Apply another projection on top of this projection, returning the 
combined projection.
+    /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as 
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+    /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 
as c2`.
+    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+        let mut new_exprs = Vec::with_capacity(other.exprs.len());
+        for proj_expr in &other.exprs {
+            let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Failed to combine projections: expression {} could 
not be updated",
+                        proj_expr.expr

Review Comment:
   The error message should provide more context about why the expression 
couldn't be updated. Consider including information about the projection being 
merged or the specific column indices involved to aid debugging.
   ```suggestion
                           "Failed to combine projections: expression {} (index 
{}) could not be updated when merging projections.\n\
                            Current projection: {:?}\n\
                            Other projection: {:?}",
                           proj_expr.expr,
                           new_exprs.len(),
                           self,
                           other
   ```



##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -224,6 +206,18 @@ impl ProjectionExpr {
     pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
         Self { expr, alias }
     }
+
+    /// Create a new projection expression from an expression and a schema 
using the expression' output field name as alias.

Review Comment:
   Corrected spelling of 'expression'' to 'expression's'.
   ```suggestion
       /// Create a new projection expression from an expression and a schema 
using the expression's output field name as alias.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to