Copilot commented on code in PR #18176:
URL: https://github.com/apache/datafusion/pull/18176#discussion_r2445332864
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for
ProjectionExpr {
}
}
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+ fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+ Self::new(Arc::clone(&value.0), value.1.clone())
+ }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+ fn from(value: ProjectionExpr) -> Self {
+ (value.expr, value.alias)
+ }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+ exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+ fn from(value: Vec<ProjectionExpr>) -> Self {
+ Self { exprs: value }
+ }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+ fn from(value: &[ProjectionExpr]) -> Self {
+ Self {
+ exprs: value.to_vec(),
+ }
+ }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+ fn as_ref(&self) -> &[ProjectionExpr] {
+ &self.exprs
+ }
+}
+
+impl Projection {
+ pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+ Self { exprs }
+ }
+
+ /// Apply another projection on top of this projection, returning the
combined projection.
+ /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+ /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0
as c2`.
+ pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+ let mut new_exprs = Vec::with_capacity(other.exprs.len());
+ for proj_expr in &other.exprs {
+ let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+ .ok_or_else(|| {
+ internal_datafusion_err!(
+ "Failed to combine projections: expression {} could
not be updated",
+ proj_expr.expr
+ )
+ })?;
+ new_exprs.push(ProjectionExpr {
+ expr: new_expr,
+ alias: proj_expr.alias.clone(),
+ });
+ }
+ Ok(Projection::new(new_exprs))
+ }
+
+ /// Merge an iterator of projections into a single projection.
+ /// For example, if the projections are:
+ /// 1. `SELECT c@2 AS x, b@1 AS y, a@0 as z`
+ /// 2. `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`
+ /// 3. `SELECT c1@0 * 2 AS final_c1`
+ /// we return a projection equivalent to `SELECT (c@2 + 1) * 2 AS
final_c1`.
+ pub fn try_merge_iter<I>(projections: I) -> Result<Projection>
+ where
+ I: IntoIterator<Item = Projection>,
+ {
+ let mut iter = projections.into_iter();
+ let first = iter
+ .next()
+ .ok_or_else(|| internal_datafusion_err!("No projections to
merge"))?;
+ iter.try_fold(first, |acc, proj| acc.try_merge(&proj))
+ }
+
+ /// Extract the column indices used in this projection.
+ /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a`
is at index 0 and `b` is at index 1,
+ /// this function would return `[0, 1]`.
+ /// Repeated indices are returned only once, and the order is ascending.
+ pub fn column_indices(&self) -> Vec<usize> {
+ self.exprs
+ .iter()
+ .map(|e| collect_columns(&e.expr).into_iter().map(|col|
col.index()))
+ .flatten()
Review Comment:
Replace `.flatten()` with `.flat_map()` for better performance and
readability. The current two-step approach of `.map().flatten()` can be
combined into a single `.flat_map()` operation.
```suggestion
.flat_map(|e| collect_columns(&e.expr).into_iter().map(|col|
col.index()))
```
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for
ProjectionExpr {
}
}
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+ fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+ Self::new(Arc::clone(&value.0), value.1.clone())
+ }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+ fn from(value: ProjectionExpr) -> Self {
+ (value.expr, value.alias)
+ }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+ exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+ fn from(value: Vec<ProjectionExpr>) -> Self {
+ Self { exprs: value }
+ }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+ fn from(value: &[ProjectionExpr]) -> Self {
+ Self {
+ exprs: value.to_vec(),
+ }
+ }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+ fn as_ref(&self) -> &[ProjectionExpr] {
+ &self.exprs
+ }
+}
+
+impl Projection {
+ pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+ Self { exprs }
+ }
+
+ /// Apply another projection on top of this projection, returning the
combined projection.
+ /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+ /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0
as c2`.
+ pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+ let mut new_exprs = Vec::with_capacity(other.exprs.len());
+ for proj_expr in &other.exprs {
+ let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+ .ok_or_else(|| {
+ internal_datafusion_err!(
+ "Failed to combine projections: expression {} could
not be updated",
+ proj_expr.expr
+ )
+ })?;
+ new_exprs.push(ProjectionExpr {
+ expr: new_expr,
+ alias: proj_expr.alias.clone(),
+ });
+ }
+ Ok(Projection::new(new_exprs))
+ }
+
+ /// Merge an iterator of projections into a single projection.
+ /// For example, if the projections are:
+ /// 1. `SELECT c@2 AS x, b@1 AS y, a@0 as z`
+ /// 2. `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`
+ /// 3. `SELECT c1@0 * 2 AS final_c1`
+ /// we return a projection equivalent to `SELECT (c@2 + 1) * 2 AS
final_c1`.
+ pub fn try_merge_iter<I>(projections: I) -> Result<Projection>
+ where
+ I: IntoIterator<Item = Projection>,
+ {
+ let mut iter = projections.into_iter();
+ let first = iter
+ .next()
+ .ok_or_else(|| internal_datafusion_err!("No projections to
merge"))?;
+ iter.try_fold(first, |acc, proj| acc.try_merge(&proj))
+ }
+
+ /// Extract the column indices used in this projection.
+ /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a`
is at index 0 and `b` is at index 1,
+ /// this function would return `[0, 1]`.
+ /// Repeated indices are returned only once, and the order is ascending.
+ pub fn column_indices(&self) -> Vec<usize> {
+ self.exprs
+ .iter()
+ .map(|e| collect_columns(&e.expr).into_iter().map(|col|
col.index()))
+ .flatten()
+ .sorted_unstable()
+ .dedup()
+ .collect_vec()
Review Comment:
The combination of `.sorted_unstable().dedup()` is inefficient. Consider
using `.unique()` from itertools (already imported) which handles both
deduplication and sorting in a single pass, or use a `HashSet` to collect
unique indices and then sort once.
```suggestion
use std::collections::HashSet;
let mut indices: HashSet<usize> = self.exprs
.iter()
.flat_map(|e| collect_columns(&e.expr).into_iter().map(|col|
col.index()))
.collect();
let mut indices_vec: Vec<usize> = indices.into_iter().collect();
indices_vec.sort_unstable();
indices_vec
```
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for
ProjectionExpr {
}
}
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+ fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+ Self::new(Arc::clone(&value.0), value.1.clone())
+ }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+ fn from(value: ProjectionExpr) -> Self {
+ (value.expr, value.alias)
+ }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+ exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+ fn from(value: Vec<ProjectionExpr>) -> Self {
+ Self { exprs: value }
+ }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+ fn from(value: &[ProjectionExpr]) -> Self {
+ Self {
+ exprs: value.to_vec(),
+ }
+ }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+ fn as_ref(&self) -> &[ProjectionExpr] {
+ &self.exprs
+ }
+}
+
+impl Projection {
+ pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+ Self { exprs }
+ }
+
+ /// Apply another projection on top of this projection, returning the
combined projection.
+ /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+ /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0
as c2`.
+ pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+ let mut new_exprs = Vec::with_capacity(other.exprs.len());
+ for proj_expr in &other.exprs {
+ let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+ .ok_or_else(|| {
+ internal_datafusion_err!(
+ "Failed to combine projections: expression {} could
not be updated",
+ proj_expr.expr
Review Comment:
The error message should provide more context about why the expression
couldn't be updated. Consider including information about the projection being
merged or the specific column indices involved to aid debugging.
```suggestion
"Failed to combine projections: expression {} (index
{}) could not be updated when merging projections.\n\
Current projection: {:?}\n\
Other projection: {:?}",
proj_expr.expr,
new_exprs.len(),
self,
other
```
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -224,6 +206,18 @@ impl ProjectionExpr {
pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
Self { expr, alias }
}
+
+ /// Create a new projection expression from an expression and a schema
using the expression' output field name as alias.
Review Comment:
Corrected spelling of 'expression'' to 'expression's'.
```suggestion
/// Create a new projection expression from an expression and a schema
using the expression's output field name as alias.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]