This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 75b019e8160 [opt](statistics) create or update table stats after alter column stats #29254 (#29721) 75b019e8160 is described below commit 75b019e816055d7579199507801e58ece0168794 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Tue Jan 9 22:13:14 2024 +0800 [opt](statistics) create or update table stats after alter column stats #29254 (#29721) Create or update table stats after alter column stats. Set flag to disable auto analyze for the table after user inject column stats. --- .../apache/doris/analysis/ShowTableStatsStmt.java | 2 ++ .../org/apache/doris/statistics/AnalysisInfo.java | 5 +++- .../doris/statistics/AnalysisInfoBuilder.java | 9 ++++++- .../apache/doris/statistics/AnalysisManager.java | 12 +++++++++ .../doris/statistics/StatisticsAutoCollector.java | 3 +++ .../doris/statistics/StatisticsRepository.java | 8 ++++++ .../apache/doris/statistics/TableStatsMeta.java | 7 +++++- .../statistics/StatisticsAutoCollectorTest.java | 9 +++++++ .../suites/statistics/analyze_stats.groovy | 29 ++++++++++++++++++++++ 9 files changed, 81 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 284b6248b85..95d36867da2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -55,6 +55,7 @@ public class ShowTableStatsStmt extends ShowStmt { .add("columns") .add("trigger") .add("new_partition") + .add("user_inject") .build(); private final TableName tableName; @@ -151,6 +152,7 @@ public class ShowTableStatsStmt extends ShowStmt { row.add(tableStatistic.analyzeColumns().toString()); row.add(tableStatistic.jobType.toString()); row.add(String.valueOf(tableStatistic.newPartitionLoaded.get())); + row.add(String.valueOf(tableStatistic.userInjected)); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index aaff9e59927..97788174e69 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -198,6 +198,8 @@ public class AnalysisInfo implements Writable { */ public final long tblUpdateTime; + public final boolean userInject; + public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId, Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, @@ -205,7 +207,7 @@ public class AnalysisInfo implements Writable { long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType, boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition, boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull, - boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob) { + boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) { this.jobId = jobId; this.taskId = taskId; this.taskIds = taskIds; @@ -242,6 +244,7 @@ public class AnalysisInfo implements Writable { this.usingSqlForPartitionColumn = usingSqlForPartitionColumn; this.tblUpdateTime = tblUpdateTime; this.emptyJob = emptyJob; + this.userInject = userInject; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 310b7816ecd..22f3d22b3ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -63,6 +63,7 @@ public class AnalysisInfoBuilder { private boolean usingSqlForPartitionColumn; private long tblUpdateTime; private boolean emptyJob; + private boolean userInject; public AnalysisInfoBuilder() { } @@ -101,6 +102,7 @@ public class AnalysisInfoBuilder { usingSqlForPartitionColumn = info.usingSqlForPartitionColumn; tblUpdateTime = info.tblUpdateTime; emptyJob = info.emptyJob; + userInject = info.userInject; } public AnalysisInfoBuilder setJobId(long jobId) { @@ -268,12 +270,17 @@ public class AnalysisInfoBuilder { return this; } + public AnalysisInfoBuilder setUserInject(boolean userInject) { + this.userInject = userInject; + return this; + } + public AnalysisInfo build() { return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, colToPartitions, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount, - cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob); + cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 8db50e667f6..f56e800e83d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -560,6 +560,17 @@ public class AnalysisManager implements Writable { } } + @VisibleForTesting + public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf tbl) { + TableStatsMeta tableStats = findTableStatsStatus(tbl.getId()); + if (tableStats == null) { + updateTableStatsStatus(new TableStatsMeta(0, jobInfo, tbl)); + } else { + tableStats.update(jobInfo, tbl); + logCreateTableStats(tableStats); + } + } + public List<AnalysisInfo> showAnalysisJob(ShowAnalyzeStmt stmt) { return findShowAnalyzeResult(analysisJobInfoMap.values(), stmt); } @@ -654,6 +665,7 @@ public class AnalysisManager implements Writable { } tableStats.updatedTime = 0; } + tableStats.userInjected = false; logCreateTableStats(tableStats); StatisticsRepository.dropStatistics(tblId, cols); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index cc6e7d88db4..f6cf79b0e5c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -158,6 +158,9 @@ public class StatisticsAutoCollector extends StatisticsCollector { if (tableStats == null || tableStats.newPartitionLoaded.get()) { return false; } + if (tableStats.userInjected) { + return true; + } return System.currentTimeMillis() - tableStats.updatedTime < StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index dc4f89c1e4d..6e88e46beb1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -303,6 +303,14 @@ public class StatisticsRepository { StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params); Env.getCurrentEnv().getStatisticsCache() .updateColStatsCache(objects.table.getId(), -1, colName, columnStatistic); + AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder() + .setTblUpdateTime(System.currentTimeMillis()) + .setColName("") + .setColToPartitions(Maps.newHashMap()) + .setUserInject(true) + .setJobType(AnalysisInfo.JobType.MANUAL) + .build(); + Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table); } else { // update partition granularity statistics for (Long partitionId : partitionIds) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 00878adcc44..926194a7258 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -72,6 +72,9 @@ public class TableStatsMeta implements Writable { @SerializedName("newPartitionLoaded") public AtomicBoolean newPartitionLoaded = new AtomicBoolean(false); + @SerializedName("userInjected") + public boolean userInjected; + @VisibleForTesting public TableStatsMeta() { tblId = 0; @@ -130,13 +133,15 @@ public class TableStatsMeta implements Writable { public void update(AnalysisInfo analyzedJob, TableIf tableIf) { updatedTime = analyzedJob.tblUpdateTime; + userInjected = analyzedJob.userInject; String colNameStr = analyzedJob.colName; // colName field AnalyzeJob's format likes: "[col1, col2]", we need to remove brackets here // TODO: Refactor this later if (analyzedJob.colName.startsWith("[") && analyzedJob.colName.endsWith("]")) { colNameStr = colNameStr.substring(1, colNameStr.length() - 1); } - List<String> cols = Arrays.stream(colNameStr.split(",")).map(String::trim).collect(Collectors.toList()); + List<String> cols = Arrays.stream(colNameStr.split(",")) + .map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); for (String col : cols) { ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(col); if (colStatsMeta == null) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index c4b2b08720f..6d95cc381b8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -321,6 +321,15 @@ public class StatisticsAutoCollectorTest { }; // can't find table stats meta, which means this table never get analyzed, so we shouldn't skip it this time Assertions.assertFalse(autoCollector.skip(olapTable)); + new MockUp<AnalysisManager>() { + + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return stats; + } + }; + stats.userInjected = true; + Assertions.assertTrue(autoCollector.skip(olapTable)); // this is not olap table nor external table, so we should skip it this time Assertions.assertTrue(autoCollector.skip(anyOtherTable)); } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 718f48bc72e..0c5479d6744 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2654,6 +2654,35 @@ PARTITION `p599` VALUES IN (599) } sql """set forbid_unknown_col_stats=true""" + // Test alter + sql """ + CREATE TABLE alter_test( + `id` int NOT NULL, + `name` VARCHAR(25) NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ANALYZE TABLE alter_test WITH SYNC""" + def alter_result = sql """show table stats alter_test""" + assertEquals("false", alter_result[0][7]) + sql """alter table alter_test modify column id set stats ('row_count'='2.0E7', 'ndv'='3927659.0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show table stats alter_test""" + assertEquals("true", alter_result[0][7]) + sql """ANALYZE TABLE alter_test WITH SYNC""" + alter_result = sql """show table stats alter_test""" + assertEquals("false", alter_result[0][7]) + sql """alter table alter_test modify column id set stats ('row_count'='2.0E7', 'ndv'='3927659.0', 'num_nulls'='0.0', 'data_size'='2.69975443E8', 'min_value'='1', 'max_value'='2');""" + alter_result = sql """show table stats alter_test""" + assertEquals("true", alter_result[0][7]) + sql """drop stats alter_test""" + alter_result = sql """show table stats alter_test""" + assertEquals("false", alter_result[0][7]) + // Test trigger type. sql """DROP DATABASE IF EXISTS trigger""" sql """CREATE DATABASE IF NOT EXISTS trigger""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org