neilconway commented on code in PR #22718:
URL: https://github.com/apache/datafusion/pull/22718#discussion_r3345630536
##########
datafusion/physical-plan/src/filter.rs:
##########
@@ -861,15 +894,56 @@ fn interval_bound_to_precision(
}
}
-/// This function ensures that all bounds in the `ExprBoundaries` vector are
-/// converted to closed bounds. If a lower/upper bound is initially open, it
-/// is adjusted by using the next/previous value for its data type to convert
-/// it into a closed bound.
+/// Scales a column's `byte_size` by the estimated filter `selectivity`. An
+/// exact zero is preserved: an empty column stays exactly empty after
+/// filtering.
+fn scale_byte_size(byte_size: Precision<usize>, selectivity: f64) ->
Precision<usize> {
+ match byte_size {
+ Precision::Exact(0) => Precision::Exact(0),
+ byte_size => byte_size.with_estimated_selectivity(selectivity),
+ }
+}
+
+/// Caps a row-bounded column statistic (a null count or distinct count) at the
+/// filtered row estimate, since a column cannot have more nulls or distinct
+/// values than it has rows. Known counts are demoted to inexact because the
+/// filtered row count is itself an estimate.
+fn cap_at_rows(
+ value: Precision<usize>,
+ filtered_num_rows: Precision<usize>,
+) -> Precision<usize> {
+ match filtered_num_rows {
+ Precision::Absent => value.to_inexact(),
+ rows => value.to_inexact().min(&rows),
+ }
+}
+
+/// Returns the NDV for a column constrained to one non-null value, such as
+/// `column = literal` or a singleton interval. The constraint gives NDV at
+/// most one; a zero-row output has no distinct values.
+///
+/// The caller is responsible for proving the singleton domain.
+fn distinct_count_for_singleton_domain(
+ filtered_num_rows: Precision<usize>,
+) -> Precision<usize> {
+ match filtered_num_rows {
+ Precision::Exact(0) | Precision::Inexact(0) => filtered_num_rows,
+ _ => Precision::Exact(1),
Review Comment:
Makes sense, done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]