This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1b509ab13c2 [Fix](statistics)Need to recalculate health value when table row count become 0 (#27673) 1b509ab13c2 is described below commit 1b509ab13c2b3be5f09d0b4a8115db7d1b86471a Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Tue Nov 28 18:47:12 2023 +0800 [Fix](statistics)Need to recalculate health value when table row count become 0 (#27673) Need to recalculate health value when table row count become 0. Otherwise, when user truncate a table, the old statistics will not be updated. --- .../apache/doris/statistics/AnalysisManager.java | 15 +++++ .../doris/statistics/StatisticsAutoCollector.java | 41 ++++++++++-- .../doris/statistics/util/StatisticsUtil.java | 3 +- .../doris/statistics/AnalysisManagerTest.java | 1 - .../statistics/StatisticsAutoCollectorTest.java | 74 ++++++++++++++++++++++ 5 files changed, 126 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 7dc570ba313..111a711eddf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -724,6 +724,21 @@ public class AnalysisManager implements Writable { StatisticsRepository.dropStatistics(tblId, cols); } + public void dropStats(TableIf table) throws DdlException { + TableStatsMeta tableStats = findTableStatsStatus(table.getId()); + if (tableStats == null) { + return; + } + Set<String> cols = table.getBaseSchema().stream().map(Column::getName).collect(Collectors.toSet()); + for (String col : cols) { + tableStats.removeColumn(col); + Env.getCurrentEnv().getStatisticsCache().invalidate(table.getId(), -1L, col); + } + tableStats.updatedTime = 0; + logCreateTableStats(tableStats); + StatisticsRepository.dropStatistics(table.getId(), cols); + } + public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException { Map<Long, BaseAnalysisTask> analysisTaskMap = analysisJobIdToTaskMap.remove(killAnalysisJobStmt.jobId); if (analysisTaskMap == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 3b1107bac08..7f8dd18d50e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -91,15 +91,21 @@ public class StatisticsAutoCollector extends StatisticsCollector { public void analyzeDb(DatabaseIf<TableIf> databaseIf) throws DdlException { List<AnalysisInfo> analysisInfos = constructAnalysisInfo(databaseIf); for (AnalysisInfo analysisInfo : analysisInfos) { - analysisInfo = getReAnalyzeRequiredPart(analysisInfo); - if (analysisInfo == null) { - continue; - } try { + if (needDropStaleStats(analysisInfo)) { + Env.getCurrentEnv().getAnalysisManager().dropStats(databaseIf.getTable(analysisInfo.tblId).get()); + continue; + } + analysisInfo = getReAnalyzeRequiredPart(analysisInfo); + if (analysisInfo == null) { + continue; + } createSystemAnalysisJob(analysisInfo); } catch (Throwable t) { analysisInfo.message = t.getMessage(); - throw t; + LOG.warn("Failed to auto analyze table {}.{}, reason {}", + databaseIf.getFullName(), analysisInfo.tblId, analysisInfo.message, t); + continue; } } } @@ -191,4 +197,29 @@ public class StatisticsAutoCollector extends StatisticsCollector { return new AnalysisInfoBuilder(jobInfo).setColToPartitions(needRunPartitions).build(); } + /** + * Check if the given table should drop stale stats. User may truncate table, + * in this case, we need to drop the stale stats. + * @param jobInfo + * @return True if you need to drop, false otherwise. + */ + protected boolean needDropStaleStats(AnalysisInfo jobInfo) { + TableIf table = StatisticsUtil + .findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); + if (!(table instanceof OlapTable)) { + return false; + } + AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager(); + TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId()); + if (tblStats == null) { + return false; + } + if (tblStats.analyzeColumns().isEmpty()) { + return false; + } + if (table.getRowCount() == 0) { + return true; + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 01438ed7084..7d28d008b13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -522,8 +522,7 @@ public class StatisticsUtil { * * @param updatedRows The number of rows updated by the table * @param totalRows The current number of rows in the table - * the healthier the statistics of the table - * @return Health, the value range is [0, 100], the larger the value, + * @return Health, the value range is [0, 100], the larger the value, the healthier the statistics of the table. */ public static int getTableHealth(long totalRows, long updatedRows) { if (updatedRows >= totalRows) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 35f02b88115..9c459080682 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -409,7 +409,6 @@ public class AnalysisManagerTest { .setColToPartitions(new HashMap<>()).setColName("col1").build(), olapTable); stats2.updatedRows.addAndGet(20); Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2)); - } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index d94bdd61248..14c6f41384f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; @@ -418,4 +419,77 @@ public class StatisticsAutoCollectorTest { Assertions.assertNotNull(task.getTableSample()); } } + + @Test + public void testNeedDropStaleStats() { + + TableIf olapTable = new OlapTable(); + TableIf otherTable = new ExternalTable(); + + new MockUp<StatisticsUtil>() { + @Mock + public TableIf findTable(long catalogId, long dbId, long tblId) { + if (tblId == 0) { + return olapTable; + } else { + return otherTable; + } + } + }; + + new MockUp<OlapTable>() { + int count = 0; + + int[] rowCounts = {100, 0}; + @Mock + public long getRowCount() { + return rowCounts[count++]; + } + + @Mock + public List<Column> getBaseSchema() { + return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); + } + }; + + AnalysisInfo analysisInfoOlap = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) + .setColToPartitions(new HashMap<>()) + .setAnalysisType(AnalysisType.FUNDAMENTALS) + .setColName("col1") + .setTblId(0) + .setJobType(JobType.SYSTEM).build(); + + new MockUp<AnalysisManager>() { + int count = 0; + + TableStatsMeta[] tableStatsArr = + new TableStatsMeta[] {null, + new TableStatsMeta(0, analysisInfoOlap, olapTable), + new TableStatsMeta(0, analysisInfoOlap, olapTable)}; + + { + tableStatsArr[1].updatedRows.addAndGet(100); + tableStatsArr[2].updatedRows.addAndGet(0); + } + + + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return tableStatsArr[count++]; + } + }; + + AnalysisInfo analysisInfoOtherTable = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) + .setColToPartitions(new HashMap<>()) + .setAnalysisType(AnalysisType.FUNDAMENTALS) + .setColName("col1") + .setTblId(1) + .setJobType(JobType.SYSTEM).build(); + + StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOtherTable)); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + Assertions.assertTrue(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org