This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 09280f8e8c3 [improvement](statistics)Remove useless stats validation check. (#43279) 09280f8e8c3 is described below commit 09280f8e8c365ffa638d7be51bf71e834f7ce265 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri Nov 8 14:57:58 2024 +0800 [improvement](statistics)Remove useless stats validation check. (#43279) Remove useless stats validation check. Before, we will not load the column stats when table rowCount > 0 and ndv == 0 and nullCount != rowCount. This is to avoid using invalid stats. Now, we remove this validation because the planner side added validation to the column stats, (see https://github.com/apache/doris/pull/41790). Besides, after remove of the validation, it is easier to add regression test using stats injection. --- .../src/main/java/org/apache/doris/statistics/ColStatsData.java | 5 ----- .../org/apache/doris/statistics/ColumnStatisticsCacheLoader.java | 8 -------- .../main/java/org/apache/doris/statistics/StatisticsCache.java | 5 +---- regression-test/suites/statistics/analyze_stats.groovy | 4 +--- 4 files changed, 2 insertions(+), 20 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java index ed0b2effdb1..beee7f2da8c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java @@ -131,11 +131,6 @@ public class ColStatsData { } public ColumnStatistic toColumnStatistic() { - // For non-empty table, return UNKNOWN if we can't collect ndv value. - // Because inaccurate ndv is very misleading. - if (count > 0 && ndv == 0 && count != nullCount) { - return ColumnStatistic.UNKNOWN; - } try { ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(count); columnStatisticBuilder.setNdv(ndv); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java index ec98ee5af15..692d723ed0a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java @@ -49,14 +49,6 @@ public class ColumnStatisticsCacheLoader extends BasicAsyncCacheLoader<Statistic } return null; } - if (columnStatistic.isPresent()) { - // For non-empty table, return UNKNOWN if we can't collect ndv value. - // Because inaccurate ndv is very misleading. - ColumnStatistic stats = columnStatistic.get(); - if (stats.count > 0 && stats.ndv == 0 && stats.count != stats.numNulls) { - columnStatistic = Optional.of(ColumnStatistic.UNKNOWN); - } - } return columnStatistic; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index a922e8619a4..88d67f0d447 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -250,10 +250,7 @@ public class StatisticsCache { final StatisticsCacheKey k = new StatisticsCacheKey(statsId.catalogId, statsId.dbId, statsId.tblId, statsId.idxId, statsId.colId); - ColumnStatistic c = ColumnStatistic.fromResultRow(r); - if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) { - c = ColumnStatistic.UNKNOWN; - } + final ColumnStatistic c = ColumnStatistic.fromResultRow(r); putCache(k, c); } catch (Throwable t) { LOG.warn("Error when preheating stats cache. reason: [{}]. Row:[{}]", t.getMessage(), r); diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index fcdb66fcbab..55074e995fe 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2749,9 +2749,7 @@ PARTITION `p599` VALUES IN (599) logger.info("show column alter_test(id) stats: " + alter_result) assertEquals(1, alter_result.size()) alter_result = sql """show column cached stats alter_test(id)""" - assertEquals(0, alter_result.size()) - alter_result = sql """show column cached stats alter_test(id)""" - assertEquals(0, alter_result.size()) + assertEquals(1, alter_result.size()) sql """alter table alter_test modify column id set stats ('row_count'='100', 'ndv'='0', 'num_nulls'='100', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" alter_result = sql """show column stats alter_test(id)""" logger.info("show column alter_test(id) stats: " + alter_result) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org