xudong963 commented on code in PR #21430:
URL: https://github.com/apache/datafusion/pull/21430#discussion_r3043417359
##########
datafusion/physical-plan/src/union.rs:
##########
@@ -959,294 +903,113 @@ mod tests {
Ok(())
}
- #[tokio::test]
- async fn test_stats_union() {
- let left = Statistics {
- num_rows: Precision::Exact(5),
- total_byte_size: Precision::Exact(23),
- column_statistics: vec![
- ColumnStatistics {
- distinct_count: Precision::Exact(5),
- max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
- min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
- sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
- null_count: Precision::Exact(0),
- byte_size: Precision::Absent,
- },
- ColumnStatistics {
- distinct_count: Precision::Exact(1),
- max_value: Precision::Exact(ScalarValue::from("x")),
- min_value: Precision::Exact(ScalarValue::from("a")),
- sum_value: Precision::Absent,
- null_count: Precision::Exact(3),
- byte_size: Precision::Absent,
- },
- ColumnStatistics {
- distinct_count: Precision::Absent,
- max_value:
Precision::Exact(ScalarValue::Float32(Some(1.1))),
- min_value:
Precision::Exact(ScalarValue::Float32(Some(0.1))),
- sum_value:
Precision::Exact(ScalarValue::Float32(Some(42.0))),
- null_count: Precision::Absent,
- byte_size: Precision::Absent,
- },
- ],
- };
+ fn stats_merge_inputs() -> (SchemaRef, Statistics, Statistics, Statistics)
{
+ let schema = Arc::new(Schema::new(vec![Field::new("a",
DataType::UInt32, true)]));
+
+ let left = Statistics::default()
+ .with_num_rows(Precision::Exact(5))
+ .with_total_byte_size(Precision::Exact(23))
+ .add_column_statistics(
+ ColumnStatistics::new_unknown()
+ .with_distinct_count(Precision::Exact(5))
+
.with_min_value(Precision::Exact(ScalarValue::UInt32(Some(1))))
+
.with_max_value(Precision::Exact(ScalarValue::UInt32(Some(21))))
+
.with_sum_value(Precision::Exact(ScalarValue::UInt32(Some(42))))
+ .with_null_count(Precision::Exact(0))
+ .with_byte_size(Precision::Exact(40)),
+ );
- let right = Statistics {
- num_rows: Precision::Exact(7),
- total_byte_size: Precision::Exact(29),
- column_statistics: vec![
- ColumnStatistics {
- distinct_count: Precision::Exact(3),
- max_value: Precision::Exact(ScalarValue::Int64(Some(34))),
- min_value: Precision::Exact(ScalarValue::Int64(Some(1))),
- sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
- null_count: Precision::Exact(1),
- byte_size: Precision::Absent,
- },
- ColumnStatistics {
- distinct_count: Precision::Absent,
- max_value: Precision::Exact(ScalarValue::from("c")),
- min_value: Precision::Exact(ScalarValue::from("b")),
- sum_value: Precision::Absent,
- null_count: Precision::Absent,
- byte_size: Precision::Absent,
- },
- ColumnStatistics {
- distinct_count: Precision::Absent,
- max_value: Precision::Absent,
- min_value: Precision::Absent,
- sum_value: Precision::Absent,
- null_count: Precision::Absent,
- byte_size: Precision::Absent,
- },
- ],
- };
+ let right = Statistics::default()
+ .with_num_rows(Precision::Exact(7))
+ .with_total_byte_size(Precision::Exact(29))
+ .add_column_statistics(
+ ColumnStatistics::new_unknown()
+ .with_distinct_count(Precision::Exact(3))
+
.with_min_value(Precision::Exact(ScalarValue::UInt32(Some(22))))
+
.with_max_value(Precision::Exact(ScalarValue::UInt32(Some(34))))
+
.with_sum_value(Precision::Exact(ScalarValue::UInt32(Some(8))))
+ .with_null_count(Precision::Exact(1))
+ .with_byte_size(Precision::Exact(60)),
+ );
- let result = stats_union(left, right);
- let expected = Statistics {
- num_rows: Precision::Exact(12),
- total_byte_size: Precision::Exact(52),
- column_statistics: vec![
- ColumnStatistics {
- distinct_count: Precision::Inexact(6),
- max_value: Precision::Exact(ScalarValue::Int64(Some(34))),
- min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
- sum_value: Precision::Exact(ScalarValue::Int64(Some(84))),
- null_count: Precision::Exact(1),
- byte_size: Precision::Absent,
- },
- ColumnStatistics {
- distinct_count: Precision::Absent,
- max_value: Precision::Exact(ScalarValue::from("x")),
- min_value: Precision::Exact(ScalarValue::from("a")),
- sum_value: Precision::Absent,
- null_count: Precision::Absent,
- byte_size: Precision::Absent,
- },
- ColumnStatistics {
- distinct_count: Precision::Absent,
- max_value: Precision::Absent,
- min_value: Precision::Absent,
- sum_value: Precision::Absent,
- null_count: Precision::Absent,
- byte_size: Precision::Absent,
- },
- ],
- };
+ let expected = Statistics::default()
+ .with_num_rows(Precision::Exact(12))
+ .with_total_byte_size(Precision::Exact(52))
+ .add_column_statistics(
+ ColumnStatistics::new_unknown()
+ .with_distinct_count(Precision::Inexact(8))
+
.with_min_value(Precision::Exact(ScalarValue::UInt32(Some(1))))
+
.with_max_value(Precision::Exact(ScalarValue::UInt32(Some(34))))
+
.with_sum_value(Precision::Exact(ScalarValue::UInt64(Some(50))))
+ .with_null_count(Precision::Exact(1))
+ .with_byte_size(Precision::Exact(100)),
+ );
- assert_eq!(result, expected);
+ (schema, left, right, expected)
}
#[test]
- fn test_union_distinct_count() {
Review Comment:
nice, I converted the PR to draft now
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]