adriangb commented on code in PR #18176:
URL: https://github.com/apache/datafusion/pull/18176#discussion_r2448472659


##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -1459,4 +1633,608 @@ mod tests {
         );
         assert!(stats.total_byte_size.is_exact().unwrap_or(false));
     }
+
+    // Tests for Projection struct
+
+    #[test]
+    fn test_projection_new() -> Result<()> {
+        let exprs = vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "b".to_string(),
+            },
+        ];
+        let projection = Projection::new(exprs.clone());
+        assert_eq!(projection.as_ref().len(), 2);
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_from_vec() -> Result<()> {
+        let exprs = vec![ProjectionExpr {
+            expr: Arc::new(Column::new("x", 0)),
+            alias: "x".to_string(),
+        }];
+        let projection: Projection = exprs.clone().into();
+        assert_eq!(projection.as_ref().len(), 1);
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_as_ref() -> Result<()> {
+        let exprs = vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 0)),
+                alias: "col1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 1)),
+                alias: "col2".to_string(),
+            },
+        ];
+        let projection = Projection::new(exprs);
+        let as_ref: &[ProjectionExpr] = projection.as_ref();
+        assert_eq!(as_ref.len(), 2);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_single_column() -> Result<()> {
+        let projection = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(Column::new("a", 3)),
+            alias: "a".to_string(),
+        }]);
+        assert_eq!(projection.column_indices(), vec![3]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_multiple_columns() -> Result<()> {
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 2)),
+                alias: "b".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 5)),
+                alias: "c".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![0, 2, 5]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_duplicates() -> Result<()> {
+        // Test that duplicate column indices appear only once
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 1)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 3)),
+                alias: "b".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a2", 1)), // duplicate index
+                alias: "a2".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![1, 3]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_unsorted() -> Result<()> {
+        // Test that column indices are sorted in the output
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 5)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 1)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 3)),
+                alias: "b".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![1, 3, 5]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_complex_expr() -> Result<()> {
+        // Test with complex expressions containing multiple columns
+        let expr = Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a", 1)),
+            Operator::Plus,
+            Arc::new(Column::new("b", 4)),
+        ));
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr,
+                alias: "sum".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "c".to_string(),
+            },
+        ]);
+        // Should return [1, 2, 4] - all columns used, sorted and deduplicated
+        assert_eq!(projection.column_indices(), vec![1, 2, 4]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_empty() -> Result<()> {
+        let projection = Projection::new(vec![]);
+        assert_eq!(projection.column_indices(), Vec::<usize>::new());
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_simple_columns() -> Result<()> {
+        // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Second projection: SELECT x@0 AS col1, y@1 AS col2
+        let top_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("x", 0)),
+                alias: "col1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("y", 1)),
+                alias: "col2".to_string(),
+            },
+        ]);
+
+        // Merge should produce: SELECT c@2 AS col1, b@1 AS col2
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_eq!(merged.as_ref().len(), 2);
+
+        // Check first expression (col1 should reference c@2)
+        let col1_expr = 
merged.as_ref()[0].expr.as_any().downcast_ref::<Column>();
+        assert!(col1_expr.is_some());
+        let col1 = col1_expr.unwrap();
+        assert_eq!(col1.name(), "c");
+        assert_eq!(col1.index(), 2);
+        assert_eq!(merged.as_ref()[0].alias, "col1");
+
+        // Check second expression (col2 should reference b@1)
+        let col2_expr = 
merged.as_ref()[1].expr.as_any().downcast_ref::<Column>();
+        assert!(col2_expr.is_some());
+        let col2 = col2_expr.unwrap();
+        assert_eq!(col2.name(), "b");
+        assert_eq!(col2.index(), 1);
+        assert_eq!(merged.as_ref()[1].alias, "col2");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_with_expressions() -> Result<()> {
+        // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Second projection: SELECT x@0 + 1 AS c1, y@1 + z@2 AS c2
+        let top_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("x", 0)),
+                    Operator::Plus,
+                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+                )),
+                alias: "c1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("y", 1)),
+                    Operator::Plus,
+                    Arc::new(Column::new("z", 2)),
+                )),
+                alias: "c2".to_string(),
+            },
+        ]);
+
+        // Merge should produce: SELECT c@2 + 1 AS c1, b@1 + a@0 AS c2
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_eq!(merged.as_ref().len(), 2);
+        assert_eq!(merged.as_ref()[0].alias, "c1");
+        assert_eq!(merged.as_ref()[1].alias, "c2");
+
+        // Check that the expressions are BinaryExpr (not just Column)
+        assert!(merged.as_ref()[0].expr.as_any().is::<BinaryExpr>());
+        assert!(merged.as_ref()[1].expr.as_any().is::<BinaryExpr>());
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_docstring_example() -> Result<()> {
+        // Example from the docstring:
+        // Base projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Top projection: SELECT x@0 + 1 AS c1, y@1 + z@2 AS c2
+        let top = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("x", 0)),
+                    Operator::Plus,
+                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+                )),
+                alias: "c1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("y", 1)),
+                    Operator::Plus,
+                    Arc::new(Column::new("z", 2)),
+                )),
+                alias: "c2".to_string(),
+            },
+        ]);
+
+        // Expected result: SELECT c@2 + 1 AS c1, b@1 + a@0 AS c2
+        let result = base.try_merge(&top)?;
+
+        assert_eq!(result.as_ref().len(), 2);
+        assert_eq!(result.as_ref()[0].alias, "c1");
+        assert_eq!(result.as_ref()[1].alias, "c2");
+
+        Ok(())
+    }
+
+    #[test]
+    fn try_merge_error() {
+        // Create a base projection
+        let base = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+        ]);
+
+        // Create a top projection that references a non-existent column index
+        let top = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(Column::new("z", 5)), // Invalid index
+            alias: "result".to_string(),
+        }]);
+
+        // Attempt to merge and expect an error
+        let result = base.try_merge(&top);
+        assert!(result.is_err());
+
+        let err_msg = result.err().unwrap().to_string();

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to