This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 09280f8e8c3 [improvement](statistics)Remove useless stats validation 
check. (#43279)
09280f8e8c3 is described below

commit 09280f8e8c365ffa638d7be51bf71e834f7ce265
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Fri Nov 8 14:57:58 2024 +0800

    [improvement](statistics)Remove useless stats validation check. (#43279)
    
    Remove useless stats validation check.
    Before, we will not load the column stats when table rowCount > 0 and
    ndv == 0 and nullCount != rowCount. This is to avoid using invalid
    stats.
    Now, we remove this validation because the planner side added validation
    to the column stats, (see https://github.com/apache/doris/pull/41790).
    Besides, after remove of the validation, it is easier to add regression
    test using stats injection.
---
 .../src/main/java/org/apache/doris/statistics/ColStatsData.java   | 5 -----
 .../org/apache/doris/statistics/ColumnStatisticsCacheLoader.java  | 8 --------
 .../main/java/org/apache/doris/statistics/StatisticsCache.java    | 5 +----
 regression-test/suites/statistics/analyze_stats.groovy            | 4 +---
 4 files changed, 2 insertions(+), 20 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
index ed0b2effdb1..beee7f2da8c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java
@@ -131,11 +131,6 @@ public class ColStatsData {
     }
 
     public ColumnStatistic toColumnStatistic() {
-        // For non-empty table, return UNKNOWN if we can't collect ndv value.
-        // Because inaccurate ndv is very misleading.
-        if (count > 0 && ndv == 0 && count != nullCount) {
-            return ColumnStatistic.UNKNOWN;
-        }
         try {
             ColumnStatisticBuilder columnStatisticBuilder = new 
ColumnStatisticBuilder(count);
             columnStatisticBuilder.setNdv(ndv);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
index ec98ee5af15..692d723ed0a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticsCacheLoader.java
@@ -49,14 +49,6 @@ public class ColumnStatisticsCacheLoader extends 
BasicAsyncCacheLoader<Statistic
             }
             return null;
         }
-        if (columnStatistic.isPresent()) {
-            // For non-empty table, return UNKNOWN if we can't collect ndv 
value.
-            // Because inaccurate ndv is very misleading.
-            ColumnStatistic stats = columnStatistic.get();
-            if (stats.count > 0 && stats.ndv == 0 && stats.count != 
stats.numNulls) {
-                columnStatistic = Optional.of(ColumnStatistic.UNKNOWN);
-            }
-        }
         return columnStatistic;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index a922e8619a4..88d67f0d447 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -250,10 +250,7 @@ public class StatisticsCache {
                 final StatisticsCacheKey k =
                         new StatisticsCacheKey(statsId.catalogId, 
statsId.dbId, statsId.tblId, statsId.idxId,
                                 statsId.colId);
-                ColumnStatistic c = ColumnStatistic.fromResultRow(r);
-                if (c.count > 0 && c.ndv == 0 && c.count != c.numNulls) {
-                    c = ColumnStatistic.UNKNOWN;
-                }
+                final ColumnStatistic c = ColumnStatistic.fromResultRow(r);
                 putCache(k, c);
             } catch (Throwable t) {
                 LOG.warn("Error when preheating stats cache. reason: [{}]. 
Row:[{}]", t.getMessage(), r);
diff --git a/regression-test/suites/statistics/analyze_stats.groovy 
b/regression-test/suites/statistics/analyze_stats.groovy
index fcdb66fcbab..55074e995fe 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -2749,9 +2749,7 @@ PARTITION `p599` VALUES IN (599)
     logger.info("show column alter_test(id) stats: " + alter_result)
     assertEquals(1, alter_result.size())
     alter_result = sql """show column cached stats alter_test(id)"""
-    assertEquals(0, alter_result.size())
-    alter_result = sql """show column cached stats alter_test(id)"""
-    assertEquals(0, alter_result.size())
+    assertEquals(1, alter_result.size())
     sql """alter table alter_test modify column id set stats 
('row_count'='100', 'ndv'='0', 'num_nulls'='100', 'data_size'='2.69975443E8', 
'min_value'='1', 'max_value'='2');"""
     alter_result = sql """show column stats alter_test(id)"""
     logger.info("show column alter_test(id) stats: " + alter_result)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to