alamb commented on code in PR #17123:
URL: https://github.com/apache/datafusion/pull/17123#discussion_r2270601725
##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -1230,4 +1232,83 @@ mod tests {
assert_eq!(result, expected);
}
+
+
+ #[test]
+ fn test_projection_statistics_uses_input_schema() {
+ let input_schema = Schema::new(vec![
+ Field::new("a", DataType::Int32, false),
+ Field::new("b", DataType::Int32, false),
+ Field::new("c", DataType::Int32, false),
+ Field::new("d", DataType::Int32, false),
+ Field::new("e", DataType::Int32, false),
+ Field::new("f", DataType::Int32, false),
+ ]);
+
+ let input_statistics = Statistics {
+ num_rows: Precision::Exact(10),
+ column_statistics: vec![
+ ColumnStatistics {
+ min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
+ max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
+ ..Default::default()
+ },
+ ColumnStatistics {
+ min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
+ max_value: Precision::Exact(ScalarValue::Int32(Some(50))),
+ ..Default::default()
+ },
+ ColumnStatistics {
+ min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
+ max_value: Precision::Exact(ScalarValue::Int32(Some(40))),
+ ..Default::default()
+ },
+ ColumnStatistics {
+ min_value: Precision::Exact(ScalarValue::Int32(Some(20))),
+ max_value: Precision::Exact(ScalarValue::Int32(Some(30))),
+ ..Default::default()
+ },
+ ColumnStatistics {
+ min_value: Precision::Exact(ScalarValue::Int32(Some(21))),
+ max_value: Precision::Exact(ScalarValue::Int32(Some(29))),
+ ..Default::default()
+ },
+ ColumnStatistics {
+ min_value: Precision::Exact(ScalarValue::Int32(Some(24))),
+ max_value: Precision::Exact(ScalarValue::Int32(Some(26))),
+ ..Default::default()
+ },
+ ],
+ ..Default::default()
+ };
+
+ let input = Arc::new(StatisticsExec::new(input_statistics,
input_schema));
+
+ // Create projection expressions that reference columns from the input
schema and the length
+ // of output schema columns < input schema columns and hence if we use
the last few columns
+ // from the input schema in the expressions here, bounds_check would
fail on them if output
+ // schema is supplied to the partitions_statistics method.
+ let exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![
+ (
+ Arc::new(Column::new("c", 2)) as Arc<dyn PhysicalExpr>,
+ "c_renamed".to_string(),
+ ),
+ (
+ Arc::new(BinaryExpr::new(
+ Arc::new(Column::new("e", 4)),
+ Operator::Plus,
+ Arc::new(Column::new("f", 5)),
+ )) as Arc<dyn PhysicalExpr>,
+ "e_plus_f".to_string(),
+ ),
+ ];
+
+ let projection = ProjectionExec::try_new(exprs, input).unwrap();
+
+ let stats = projection.partition_statistics(None).unwrap();
+
+ assert_eq!(stats.num_rows, Precision::Exact(10));
+ assert_eq!(stats.column_statistics.len(), 2, "Expected 2 columns in
projection statistics");
+ assert_eq!(stats.total_byte_size.is_exact().unwrap_or(false), true);
Review Comment:
I verified this test covers the change by running the test without the code
change and it fails like this
```
assertion `left == right` failed
left: false
right: true
Left: false
Right: true
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]