This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1341e93c2d2 [improvement](statistics)Support auto analyze columns that haven't been analyzed for a long time. (#42399) 1341e93c2d2 is described below commit 1341e93c2d26ec7518cbbe234ad2ac721b08148f Author: James <lijib...@selectdb.com> AuthorDate: Wed Dec 11 15:09:09 2024 +0800 [improvement](statistics)Support auto analyze columns that haven't been analyzed for a long time. (#42399) Support auto analyze columns that haven't been analyzed for a long time. Add a very low priority job queue for auto analyze to process this kind of columns. The purpose of this change is to make sure all tables could be auto analyzed within a certain time. In the earlier Doris versions, users often encounter this kind of issues: User load some new data to a large table everyday, but the change rate (percentage of new data) is very low, because there is a large size of old data. In this case, auto analyze for this table will not be triggered for a very long time, because the default trigger threshold of auto analyze is 40% (more than 40% of the data in a table is changed since last analyze). This will probably cause a bad plan because min/max/ndv statistics are outdated. --- .../main/java/org/apache/doris/common/Config.java | 2 +- .../doris/analysis/ShowAutoAnalyzeJobsStmt.java | 4 +- .../apache/doris/analysis/ShowColumnStatsStmt.java | 2 + .../apache/doris/analysis/ShowTableStatsStmt.java | 9 +- .../java/org/apache/doris/catalog/OlapTable.java | 11 -- .../org/apache/doris/statistics/AnalysisInfo.java | 9 +- .../doris/statistics/AnalysisInfoBuilder.java | 9 +- .../apache/doris/statistics/AnalysisManager.java | 7 +- .../org/apache/doris/statistics/ColStatsMeta.java | 9 +- .../org/apache/doris/statistics/JobPriority.java | 1 + .../doris/statistics/StatisticsAutoCollector.java | 40 +++-- .../doris/statistics/StatisticsJobAppender.java | 66 +++++--- .../doris/statistics/StatisticsRepository.java | 3 +- .../apache/doris/statistics/TableStatsMeta.java | 10 +- .../doris/statistics/util/StatisticsUtil.java | 52 +++++- .../statistics/StatisticsJobAppenderTest.java | 175 ++++++++++++++++----- .../doris/statistics/util/StatisticsUtilTest.java | 168 ++++++++++++++++---- .../suites/statistics/analyze_stats.groovy | 33 +++- 18 files changed, 472 insertions(+), 138 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 50d43bffa1b..c5c7023b20f 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2948,7 +2948,7 @@ public class Config extends ConfigBase { "Columns that have not been collected within the specified interval will trigger automatic analyze. " + "0 means not trigger." }) - public static long auto_analyze_interval_seconds = 0; + public static long auto_analyze_interval_seconds = 86400; // 24 hours. //========================================================================== // begin of cloud config diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java index 9b07796df78..7cff1e2b949 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java @@ -40,7 +40,7 @@ import com.google.common.collect.ImmutableList; * [TABLE] * [ * WHERE - * [PRIORITY = ["HIGH"|"MID"|"LOW"]] + * [PRIORITY = ["HIGH"|"MID"|"LOW"|"VERY_LOW"]] * ] */ public class ShowAutoAnalyzeJobsStmt extends ShowStmt implements NotFallbackInParser { @@ -175,7 +175,7 @@ public class ShowAutoAnalyzeJobsStmt extends ShowStmt implements NotFallbackInPa if (!valid) { throw new AnalysisException("Where clause should looks like: " - + "PRIORITY = \"HIGH|MID|LOW\""); + + "PRIORITY = \"HIGH|MID|LOW|VERY_LOW\""); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java index 354b57bc55c..d180ef0d807 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java @@ -70,6 +70,7 @@ public class ShowColumnStatsStmt extends ShowStmt implements NotFallbackInParser .add("updated_time") .add("update_rows") .add("last_analyze_row_count") + .add("last_analyze_version") .build(); private static final ImmutableList<String> PARTITION_COLUMN_TITLE_NAMES = @@ -185,6 +186,7 @@ public class ShowColumnStatsStmt extends ShowStmt implements NotFallbackInParser row.add(String.valueOf(p.second.updatedTime)); row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.updatedRows)); row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.rowCount)); + row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.tableVersion)); result.add(row); }); return new ShowResultSet(getMetaData(), result); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index ccffee3086d..ea9b96d0afe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -62,6 +62,7 @@ public class ShowTableStatsStmt extends ShowStmt implements NotFallbackInParser .add("new_partition") .add("user_inject") .add("enable_auto_analyze") + .add("last_analyze_time") .build(); private static final ImmutableList<String> PARTITION_TITLE_NAMES = @@ -230,6 +231,7 @@ public class ShowTableStatsStmt extends ShowStmt implements NotFallbackInParser row.add(""); row.add(""); row.add(String.valueOf(table.autoAnalyzeEnabled())); + row.add(""); result.add(row); return new ShowResultSet(getMetaData(), result); } @@ -242,13 +244,16 @@ public class ShowTableStatsStmt extends ShowStmt implements NotFallbackInParser LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.updatedTime), java.time.ZoneId.systemDefault()); - String formattedDateTime = dateTime.format(formatter); - row.add(formattedDateTime); + LocalDateTime lastAnalyzeTime = + LocalDateTime.ofInstant(Instant.ofEpochMilli(tableStatistic.lastAnalyzeTime), + java.time.ZoneId.systemDefault()); + row.add(dateTime.format(formatter)); row.add(tableStatistic.analyzeColumns().toString()); row.add(tableStatistic.jobType.toString()); row.add(String.valueOf(tableStatistic.partitionChanged.get())); row.add(String.valueOf(tableStatistic.userInjected)); row.add(table == null ? "N/A" : String.valueOf(table.autoAnalyzeEnabled())); + row.add(lastAnalyzeTime.format(formatter)); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index c1d62a4be0d..f3d27908bdd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -969,17 +969,6 @@ public class OlapTable extends Table implements MTMVRelatedTableIf, GsonPostProc return columns; } - public List<Column> getMvColumns(boolean full) { - List<Column> columns = Lists.newArrayList(); - for (Long indexId : indexIdToMeta.keySet()) { - if (indexId == baseIndexId) { - continue; - } - columns.addAll(getSchemaByIndexId(indexId, full)); - } - return columns; - } - public List<Column> getBaseSchemaKeyColumns() { return getKeyColumnsByIndexId(baseIndexId); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 6ec413821ea..58b2c3e3d1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -183,6 +183,9 @@ public class AnalysisInfo implements Writable { @SerializedName("updateRows") public final long updateRows; + @SerializedName("tv") + public final long tableVersion; + public final Map<Long, Long> partitionUpdateRows = new ConcurrentHashMap<>(); @SerializedName("tblUpdateTime") @@ -206,8 +209,8 @@ public class AnalysisInfo implements Writable { long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType, boolean partitionOnly, boolean samplingPartition, boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull, - boolean usingSqlForExternalTable, long tblUpdateTime, long rowCount, boolean userInject, - long updateRows, JobPriority priority, Map<Long, Long> partitionUpdateRows, boolean enablePartition) { + boolean usingSqlForExternalTable, long tblUpdateTime, long rowCount, boolean userInject, long updateRows, + long tableVersion, JobPriority priority, Map<Long, Long> partitionUpdateRows, boolean enablePartition) { this.jobId = jobId; this.taskId = taskId; this.taskIds = taskIds; @@ -244,6 +247,7 @@ public class AnalysisInfo implements Writable { this.rowCount = rowCount; this.userInject = userInject; this.updateRows = updateRows; + this.tableVersion = tableVersion; this.priority = priority; if (partitionUpdateRows != null) { this.partitionUpdateRows.putAll(partitionUpdateRows); @@ -292,6 +296,7 @@ public class AnalysisInfo implements Writable { sj.add("rowCount: " + rowCount); sj.add("userInject: " + userInject); sj.add("updateRows: " + updateRows); + sj.add("tableVersion: " + tableVersion); sj.add("priority: " + priority.name()); sj.add("enablePartition: " + enablePartition); return sj.toString(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 73817363ef1..bbd0d616495 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -63,6 +63,7 @@ public class AnalysisInfoBuilder { private long rowCount; private boolean userInject = false; private long updateRows; + private long tableVersion; private JobPriority priority; private Map<Long, Long> partitionUpdateRows; private boolean enablePartition; @@ -104,6 +105,7 @@ public class AnalysisInfoBuilder { rowCount = info.rowCount; userInject = info.userInject; updateRows = info.updateRows; + tableVersion = info.tableVersion; priority = info.priority; partitionUpdateRows = info.partitionUpdateRows; enablePartition = info.enablePartition; @@ -274,6 +276,11 @@ public class AnalysisInfoBuilder { return this; } + public AnalysisInfoBuilder setTableVersion(long tableVersion) { + this.tableVersion = tableVersion; + return this; + } + public AnalysisInfoBuilder setPriority(JobPriority priority) { this.priority = priority; return this; @@ -295,7 +302,7 @@ public class AnalysisInfoBuilder { sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, partitionOnly, samplingPartition, isAllPartition, partitionCount, cronExpression, forceFull, usingSqlForExternalTable, tblUpdateTime, rowCount, userInject, updateRows, - priority, partitionUpdateRows, enablePartition); + tableVersion, priority, partitionUpdateRows, enablePartition); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index c2b20707f13..ece9daf2520 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -129,6 +129,7 @@ public class AnalysisManager implements Writable { public final Map<TableName, Set<Pair<String, String>>> highPriorityJobs = new LinkedHashMap<>(); public final Map<TableName, Set<Pair<String, String>>> midPriorityJobs = new LinkedHashMap<>(); public final Map<TableName, Set<Pair<String, String>>> lowPriorityJobs = new LinkedHashMap<>(); + public final Map<TableName, Set<Pair<String, String>>> veryLowPriorityJobs = new LinkedHashMap<>(); // Tracking running manually submitted async tasks, keep in mem only protected final ConcurrentMap<Long, Map<Long, BaseAnalysisTask>> analysisJobIdToTaskMap = new ConcurrentHashMap<>(); @@ -381,7 +382,7 @@ public class AnalysisManager implements Writable { } infoBuilder.setColName(stringJoiner.toString()); infoBuilder.setTaskIds(Lists.newArrayList()); - infoBuilder.setTblUpdateTime(System.currentTimeMillis()); + infoBuilder.setTblUpdateTime(table.getUpdateTime()); // Empty table row count is 0. Call fetchRowCount() when getRowCount() returns <= 0, // because getRowCount may return <= 0 if cached is not loaded. This is mainly for external table. long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : @@ -389,6 +390,7 @@ public class AnalysisManager implements Writable { infoBuilder.setRowCount(rowCount); TableStatsMeta tableStatsStatus = findTableStatsStatus(table.getId()); infoBuilder.setUpdateRows(tableStatsStatus == null ? 0 : tableStatsStatus.updatedRows.get()); + infoBuilder.setTableVersion(table instanceof OlapTable ? ((OlapTable) table).getVisibleVersion() : 0); infoBuilder.setPriority(JobPriority.MANUAL); infoBuilder.setPartitionUpdateRows(tableStatsStatus == null ? null : tableStatsStatus.partitionUpdateRows); infoBuilder.setEnablePartition(StatisticsUtil.enablePartitionAnalyze()); @@ -547,12 +549,15 @@ public class AnalysisManager implements Writable { result.addAll(getPendingJobs(highPriorityJobs, JobPriority.HIGH, tblName)); result.addAll(getPendingJobs(midPriorityJobs, JobPriority.MID, tblName)); result.addAll(getPendingJobs(lowPriorityJobs, JobPriority.LOW, tblName)); + result.addAll(getPendingJobs(veryLowPriorityJobs, JobPriority.VERY_LOW, tblName)); } else if (priority.equals(JobPriority.HIGH.name())) { result.addAll(getPendingJobs(highPriorityJobs, JobPriority.HIGH, tblName)); } else if (priority.equals(JobPriority.MID.name())) { result.addAll(getPendingJobs(midPriorityJobs, JobPriority.MID, tblName)); } else if (priority.equals(JobPriority.LOW.name())) { result.addAll(getPendingJobs(lowPriorityJobs, JobPriority.LOW, tblName)); + } else if (priority.equals(JobPriority.VERY_LOW.name())) { + result.addAll(getPendingJobs(veryLowPriorityJobs, JobPriority.VERY_LOW, tblName)); } return result; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java index 6cb2ced9286..78f51c2ac0c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java @@ -52,11 +52,15 @@ public class ColStatsMeta { @SerializedName("rowCount") public long rowCount; + @SerializedName("tv") + public long tableVersion; + @SerializedName("pur") public ConcurrentMap<Long, Long> partitionUpdateRows = new ConcurrentHashMap<>(); - public ColStatsMeta(long updatedTime, AnalysisMethod analysisMethod, AnalysisType analysisType, JobType jobType, - long queriedTimes, long rowCount, long updatedRows, Map<Long, Long> partitionUpdateRows) { + public ColStatsMeta(long updatedTime, AnalysisMethod analysisMethod, AnalysisType analysisType, + JobType jobType, long queriedTimes, long rowCount, long updatedRows, + long tableVersion, Map<Long, Long> partitionUpdateRows) { this.updatedTime = updatedTime; this.analysisMethod = analysisMethod; this.analysisType = analysisType; @@ -64,6 +68,7 @@ public class ColStatsMeta { this.queriedTimes.addAndGet(queriedTimes); this.updatedRows = updatedRows; this.rowCount = rowCount; + this.tableVersion = tableVersion; if (partitionUpdateRows != null) { this.partitionUpdateRows.putAll(partitionUpdateRows); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java index c3656b92927..df95b3cbede 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java @@ -21,5 +21,6 @@ public enum JobPriority { HIGH, MID, LOW, + VERY_LOW, MANUAL; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 38af43bef73..6c1fd1ba959 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -26,7 +26,6 @@ import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.Pair; import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.JobType; @@ -37,7 +36,6 @@ import org.apache.hudi.common.util.VisibleForTesting; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.time.LocalTime; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -92,10 +90,11 @@ public class StatisticsAutoCollector extends MasterDaemon { } protected void collect() { - while (canCollect()) { + while (StatisticsUtil.canCollect()) { Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority> job = getJob(); if (job == null) { // No more job to process, break and sleep. + LOG.info("No auto analyze jobs to process."); break; } try { @@ -112,11 +111,6 @@ public class StatisticsAutoCollector extends MasterDaemon { } } - protected boolean canCollect() { - return StatisticsUtil.enableAutoAnalyze() - && StatisticsUtil.inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId())); - } - protected Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority> getJob() { AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); Optional<Entry<TableName, Set<Pair<String, String>>>> job = fetchJobFromMap(manager.highPriorityJobs); @@ -128,7 +122,11 @@ public class StatisticsAutoCollector extends MasterDaemon { return Pair.of(job.get(), JobPriority.MID); } job = fetchJobFromMap(manager.lowPriorityJobs); - return job.map(entry -> Pair.of(entry, JobPriority.LOW)).orElse(null); + if (job.isPresent()) { + return Pair.of(job.get(), JobPriority.LOW); + } + job = fetchJobFromMap(manager.veryLowPriorityJobs); + return job.map(tableNameSetEntry -> Pair.of(tableNameSetEntry, JobPriority.VERY_LOW)).orElse(null); } protected Optional<Map.Entry<TableName, Set<Pair<String, String>>>> fetchJobFromMap( @@ -142,9 +140,13 @@ public class StatisticsAutoCollector extends MasterDaemon { protected void processOneJob(TableIf table, Set<Pair<String, String>> columns, JobPriority priority) throws DdlException { - // appendMvColumn(table, columns); appendAllColumns(table, columns); - columns = columns.stream().filter(c -> StatisticsUtil.needAnalyzeColumn(table, c)).collect(Collectors.toSet()); + columns = columns.stream().filter( + c -> StatisticsUtil.needAnalyzeColumn(table, c) || StatisticsUtil.isLongTimeColumn(table, c)) + .collect(Collectors.toSet()); + if (columns.isEmpty()) { + return; + } AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority); if (analyzeJob == null) { return; @@ -178,15 +180,6 @@ public class StatisticsAutoCollector extends MasterDaemon { } } - protected void appendMvColumn(TableIf table, Set<String> columns) { - if (!(table instanceof OlapTable)) { - return; - } - OlapTable olapTable = (OlapTable) table; - Set<String> mvColumns = olapTable.getMvColumns(false).stream().map(Column::getName).collect(Collectors.toSet()); - columns.addAll(mvColumns); - } - protected boolean supportAutoAnalyze(TableIf tableIf) { if (tableIf == null) { return false; @@ -248,9 +241,10 @@ public class StatisticsAutoCollector extends MasterDaemon { .setTaskIds(new ArrayList<>()) .setLastExecTimeInMs(System.currentTimeMillis()) .setJobType(JobType.SYSTEM) - .setTblUpdateTime(System.currentTimeMillis()) + .setTblUpdateTime(table.getUpdateTime()) .setRowCount(rowCount) .setUpdateRows(tableStatsStatus == null ? 0 : tableStatsStatus.updatedRows.get()) + .setTableVersion(table instanceof OlapTable ? ((OlapTable) table).getVisibleVersion() : 0) .setPriority(priority) .setPartitionUpdateRows(tableStatsStatus == null ? null : tableStatsStatus.partitionUpdateRows) .setEnablePartition(StatisticsUtil.enablePartitionAnalyze()) @@ -275,4 +269,8 @@ public class StatisticsAutoCollector extends MasterDaemon { future.get(); } } + + public boolean isReady() { + return waited; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java index b67d1cf947c..4a3e93550fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java @@ -29,6 +29,7 @@ import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.statistics.util.StatisticsUtil; +import com.google.common.annotations.VisibleForTesting; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -46,7 +47,7 @@ public class StatisticsJobAppender extends MasterDaemon { private static final Logger LOG = LogManager.getLogger(StatisticsJobAppender.class); public static final long INTERVAL = 1000; - public static final int JOB_MAP_SIZE = 1000; + public static final int JOB_MAP_SIZE = 100; public static final int TABLE_BATCH_SIZE = 100; private long currentDbId = 0; @@ -70,6 +71,11 @@ public class StatisticsJobAppender extends MasterDaemon { LOG.info("Stats table not available, skip"); return; } + if (Env.getCurrentEnv().getStatisticsAutoCollector() == null + || !Env.getCurrentEnv().getStatisticsAutoCollector().isReady()) { + LOG.info("Statistics auto collector not ready, skip"); + return; + } if (Env.isCheckpointThread()) { return; } @@ -81,7 +87,7 @@ public class StatisticsJobAppender extends MasterDaemon { appendColumnsToJobs(manager.highPriorityColumns, manager.highPriorityJobs); appendColumnsToJobs(manager.midPriorityColumns, manager.midPriorityJobs); if (StatisticsUtil.enableAutoAnalyzeInternalCatalog()) { - appendToLowJobs(manager.lowPriorityJobs); + appendToLowJobs(manager.lowPriorityJobs, manager.veryLowPriorityJobs); } } @@ -136,7 +142,8 @@ public class StatisticsJobAppender extends MasterDaemon { } } - protected void appendToLowJobs(Map<TableName, Set<Pair<String, String>>> jobs) { + protected void appendToLowJobs(Map<TableName, Set<Pair<String, String>>> lowPriorityJobs, + Map<TableName, Set<Pair<String, String>>> veryLowPriorityJobs) { if (System.currentTimeMillis() - lastRoundFinishTime < lowJobIntervalMs) { return; } @@ -162,27 +169,33 @@ public class StatisticsJobAppender extends MasterDaemon { if (t.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) { continue; } - Set<Pair<String, String>> columnIndexPairs = t.getColumnIndexPairs( - t.getSchemaAllIndexes(false).stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.toSet())) + Set<String> columns = t.getSchemaAllIndexes(false).stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName).collect(Collectors.toSet()); + Set<Pair<String, String>> columnIndexPairs = t.getColumnIndexPairs(columns) .stream().filter(p -> StatisticsUtil.needAnalyzeColumn(t, p)) .collect(Collectors.toSet()); - if (columnIndexPairs.isEmpty()) { - continue; - } TableName tableName = new TableName(t.getDatabase().getCatalog().getName(), t.getDatabase().getFullName(), t.getName()); - synchronized (jobs) { - // If job map reach the upper limit, stop adding new jobs. - if (!jobs.containsKey(tableName) && jobs.size() >= JOB_MAP_SIZE) { - LOG.info("Low job map full."); + // Append to low job map first. + if (!columnIndexPairs.isEmpty()) { + boolean appended = doAppend(lowPriorityJobs, columnIndexPairs, tableName); + // If low job map is full, stop this iteration. + if (!appended) { + LOG.debug("Low Priority job map is full."); return; } - if (jobs.containsKey(tableName)) { - jobs.get(tableName).addAll(columnIndexPairs); - } else { - jobs.put(tableName, columnIndexPairs); + } else { + // Append to very low job map. + columnIndexPairs = t.getColumnIndexPairs(columns) + .stream().filter(p -> StatisticsUtil.isLongTimeColumn(t, p)) + .collect(Collectors.toSet()); + if (!columnIndexPairs.isEmpty()) { + boolean appended = doAppend(veryLowPriorityJobs, columnIndexPairs, tableName); + // If very low job map is full, simply ignore it and go to the next table. + if (!appended) { + LOG.debug("Very low Priority job map is full."); + } } } currentTableId = t.getId(); @@ -200,6 +213,23 @@ public class StatisticsJobAppender extends MasterDaemon { lastRoundFinishTime = System.currentTimeMillis(); } + @VisibleForTesting + public boolean doAppend(Map<TableName, Set<Pair<String, String>>> jobMap, + Set<Pair<String, String>> columnIndexPairs, + TableName tableName) { + synchronized (jobMap) { + if (!jobMap.containsKey(tableName) && jobMap.size() >= JOB_MAP_SIZE) { + return false; + } + if (jobMap.containsKey(tableName)) { + jobMap.get(tableName).addAll(columnIndexPairs); + } else { + jobMap.put(tableName, columnIndexPairs); + } + } + return true; + } + // For unit test only. public void setLastRoundFinishTime(long value) { lastRoundFinishTime = value; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index ba23ab84dc7..ac4704b54c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -379,9 +379,8 @@ public class StatisticsRepository { objects.catalog.getId(), objects.db.getId(), objects.table.getId(), indexId, colName, null, columnStatistic); Env.getCurrentEnv().getStatisticsCache().syncColStats(data); - long timestamp = System.currentTimeMillis(); AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder() - .setTblUpdateTime(timestamp) + .setTblUpdateTime(objects.table.getUpdateTime()) .setColName("") .setRowCount((long) Double.parseDouble(rowCount)) .setJobColumns(Sets.newHashSet()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 6a7f2933996..4ebdb019f27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -80,6 +80,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { @SerializedName("updateTime") public long updatedTime; + @SerializedName("lat") + public long lastAnalyzeTime; + @SerializedName("colNameToColStatsMeta") private ConcurrentMap<String, ColStatsMeta> deprecatedColNameToColStatsMeta = new ConcurrentHashMap<>(); @@ -160,6 +163,7 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { public void update(AnalysisInfo analyzedJob, TableIf tableIf) { updatedTime = analyzedJob.tblUpdateTime; + lastAnalyzeTime = analyzedJob.createTime; if (analyzedJob.userInject) { userInjected = true; } @@ -168,14 +172,16 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { if (colStatsMeta == null) { colToColStatsMeta.put(colPair, new ColStatsMeta(analyzedJob.createTime, analyzedJob.analysisMethod, analyzedJob.analysisType, analyzedJob.jobType, 0, analyzedJob.rowCount, - analyzedJob.updateRows, analyzedJob.enablePartition ? analyzedJob.partitionUpdateRows : null)); + analyzedJob.updateRows, analyzedJob.tableVersion, + analyzedJob.enablePartition ? analyzedJob.partitionUpdateRows : null)); } else { - colStatsMeta.updatedTime = analyzedJob.tblUpdateTime; + colStatsMeta.updatedTime = analyzedJob.createTime; colStatsMeta.analysisType = analyzedJob.analysisType; colStatsMeta.analysisMethod = analyzedJob.analysisMethod; colStatsMeta.jobType = analyzedJob.jobType; colStatsMeta.updatedRows = analyzedJob.updateRows; colStatsMeta.rowCount = analyzedJob.rowCount; + colStatsMeta.tableVersion = analyzedJob.tableVersion; if (analyzedJob.enablePartition) { if (colStatsMeta.partitionUpdateRows == null) { colStatsMeta.partitionUpdateRows = new ConcurrentHashMap<>(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index a9c1612eb48..dd037617ba6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -53,6 +53,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.InternalCatalog; @@ -1012,12 +1013,6 @@ public class StatisticsUtil { if (columnStatsMeta == null) { return true; } - // Column hasn't been analyzed for longer than config interval. - if (Config.auto_analyze_interval_seconds > 0 - && System.currentTimeMillis() - columnStatsMeta.updatedTime - > Config.auto_analyze_interval_seconds * 1000) { - return true; - } // Partition table partition stats never been collected. if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable() && columnStatsMeta.partitionUpdateRows == null) { @@ -1072,7 +1067,7 @@ public class StatisticsUtil { } // External is hard to calculate change rate, use time interval to control analyze frequency. return System.currentTimeMillis() - - tableStatsStatus.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis(); + - tableStatsStatus.lastAnalyzeTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis(); } } @@ -1127,4 +1122,47 @@ public class StatisticsUtil { } return false; } + + // This function return true means the column hasn't been analyzed for longer than the configured time. + public static boolean isLongTimeColumn(TableIf table, Pair<String, String> column) { + if (column == null) { + return false; + } + if (!table.autoAnalyzeEnabled()) { + return false; + } + if (!(table instanceof OlapTable)) { + return false; + } + AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); + TableStatsMeta tblStats = manager.findTableStatsStatus(table.getId()); + // Table never been analyzed, skip it for higher priority jobs. + if (tblStats == null) { + LOG.warn("Table stats is null."); + return false; + } + ColStatsMeta columnStats = tblStats.findColumnStatsMeta(column.first, column.second); + if (columnStats == null) { + // Column never been analyzed, skip it for higher priority jobs. + return false; + } + // User injected column stats, don't do auto analyze, avoid overwrite user injected stats. + if (tblStats.userInjected) { + return false; + } + boolean isLongTime = Config.auto_analyze_interval_seconds > 0 + && System.currentTimeMillis() - columnStats.updatedTime > Config.auto_analyze_interval_seconds * 1000; + if (!isLongTime) { + return false; + } + // For olap table, if the table visible version and row count doesn't change since last analyze, + // we don't need to analyze it because its data is not changed. + OlapTable olapTable = (OlapTable) table; + return olapTable.getVisibleVersion() != columnStats.tableVersion + || olapTable.getRowCount() != columnStats.rowCount; + } + + public static boolean canCollect() { + return enableAutoAnalyze() && inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId())); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java index e3255ab23a0..5b890795f01 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java @@ -33,6 +33,8 @@ import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import mockit.Mock; import mockit.MockUp; import org.junit.jupiter.api.Assertions; @@ -178,13 +180,21 @@ public class StatisticsJobAppenderTest { } }; - Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>(); + new MockUp<StatisticsUtil>() { + @Mock + public boolean needAnalyzeColumn(TableIf table, Pair<String, String> column) { + return true; + } + }; + + Map<TableName, Set<Pair<String, String>>> testLowMap = new HashMap<>(); + Map<TableName, Set<Pair<String, String>>> testVeryLowMap = new HashMap<>(); StatisticsJobAppender appender = new StatisticsJobAppender(); - appender.appendToLowJobs(testMap); - Assertions.assertEquals(100, testMap.size()); - testMap.clear(); - appender.appendToLowJobs(testMap); - Assertions.assertEquals(40, testMap.size()); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(100, testLowMap.size()); + testLowMap.clear(); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(40, testLowMap.size()); for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE; i++) { Database db = new Database(id++, "testDb" + i); @@ -198,38 +208,93 @@ public class StatisticsJobAppenderTest { db.createTableWithLock(table2, true, false); } - testMap.clear(); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); + testLowMap.clear(); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); + appender.appendToLowJobs(testLowMap, testVeryLowMap); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); + appender.appendToLowJobs(testLowMap, testVeryLowMap); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testLowMap.size()); + } + + @Test + public void testAppendQueryColumnToVeryLowJobMap() throws DdlException { + InternalCatalog testCatalog = new InternalCatalog(); + int id = 10; + for (int i = 0; i < 70; i++) { + Database db = new Database(id++, "testDb" + i); + testCatalog.unprotectCreateDb(db); + Column column1 = new Column("placeholder", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + db.createTableWithLock(table1, true, false); + db.createTableWithLock(table2, true, false); + } + + new MockUp<Env>() { + @Mock + public InternalCatalog getCurrentInternalCatalog() { + return testCatalog; + } + }; + + new MockUp<OlapTable>() { + @Mock + public List<Column> getBaseSchema() { + return Lists.newArrayList(); + } + + @Mock + public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) { + return Collections.singleton(Pair.of("mockIndex", "mockColumn")); + } + }; + + new MockUp<StatisticsUtil>() { + @Mock + public boolean needAnalyzeColumn(TableIf table, Pair<String, String> column) { + return false; + } + + @Mock + public boolean isLongTimeColumn(TableIf table, Pair<String, String> column) { + return true; + } + }; + + Map<TableName, Set<Pair<String, String>>> testLowMap = new HashMap<>(); + Map<TableName, Set<Pair<String, String>>> testVeryLowMap = new HashMap<>(); + StatisticsJobAppender appender = new StatisticsJobAppender(); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(100, testVeryLowMap.size()); + testVeryLowMap.clear(); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(40, testVeryLowMap.size()); + + for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE; i++) { + Database db = new Database(id++, "testDb" + i); + testCatalog.unprotectCreateDb(db); + Column column1 = new Column("placeholder", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column1); + OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null); + db.createTableWithLock(table1, true, false); + db.createTableWithLock(table2, true, false); + } + + testLowMap.clear(); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); + appender.appendToLowJobs(testLowMap, testVeryLowMap); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); + appender.appendToLowJobs(testLowMap, testVeryLowMap); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size()); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(0, testLowMap.size()); + Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testVeryLowMap.size()); } @Test @@ -270,12 +335,48 @@ public class StatisticsJobAppenderTest { return thresholds[count++]; } }; - Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>(); + Map<TableName, Set<Pair<String, String>>> testLowMap = new HashMap<>(); + Map<TableName, Set<Pair<String, String>>> testVeryLowMap = new HashMap<>(); StatisticsJobAppender appender = new StatisticsJobAppender(); - appender.appendToLowJobs(testMap); - Assertions.assertEquals(0, testMap.size()); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(0, testLowMap.size()); appender.setLastRoundFinishTime(0); - appender.appendToLowJobs(testMap); - Assertions.assertEquals(1, testMap.size()); + appender.appendToLowJobs(testLowMap, testVeryLowMap); + Assertions.assertEquals(1, testLowMap.size()); + } + + @Test + public void testDoAppend() { + Map<TableName, Set<Pair<String, String>>> jobMap = Maps.newHashMap(); + Set<Pair<String, String>> columnIndexPairs1 = Sets.newHashSet(); + Set<Pair<String, String>> columnIndexPairs2 = Sets.newHashSet(); + TableName tableName1 = new TableName("catalog1", "db1", "table1"); + TableName tableName2 = new TableName("catalog2", "db2", "table2"); + Pair<String, String> pair1 = Pair.of("index1", "col1"); + columnIndexPairs1.add(pair1); + + StatisticsJobAppender appender = new StatisticsJobAppender(); + Assertions.assertTrue(appender.doAppend(jobMap, columnIndexPairs1, tableName1)); + Assertions.assertEquals(1, jobMap.size()); + Assertions.assertTrue(jobMap.containsKey(tableName1)); + Assertions.assertEquals(1, jobMap.get(tableName1).size()); + Assertions.assertTrue(jobMap.get(tableName1).contains(pair1)); + + Pair<String, String> pair2 = Pair.of("index2", "col2"); + columnIndexPairs1.add(pair2); + Assertions.assertTrue(appender.doAppend(jobMap, columnIndexPairs1, tableName1)); + Assertions.assertEquals(1, jobMap.size()); + Assertions.assertTrue(jobMap.containsKey(tableName1)); + Assertions.assertEquals(2, jobMap.get(tableName1).size()); + Assertions.assertTrue(jobMap.get(tableName1).contains(pair1)); + Assertions.assertTrue(jobMap.get(tableName1).contains(pair2)); + + Pair<String, String> pair3 = Pair.of("index3", "col3"); + columnIndexPairs2.add(pair3); + Assertions.assertTrue(appender.doAppend(jobMap, columnIndexPairs2, tableName2)); + Assertions.assertEquals(2, jobMap.size()); + Assertions.assertTrue(jobMap.containsKey(tableName2)); + Assertions.assertEquals(1, jobMap.get(tableName2).size()); + Assertions.assertTrue(jobMap.get(tableName2).contains(pair3)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index fbac718e421..ef1e9ca0297 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -32,6 +32,7 @@ import org.apache.doris.datasource.ExternalTable; import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; +import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.datasource.jdbc.JdbcExternalCatalog; import org.apache.doris.datasource.jdbc.JdbcExternalTable; import org.apache.doris.qe.SessionVariable; @@ -208,12 +209,6 @@ class StatisticsUtilTest { Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); // Test external table auto analyze enabled. - new MockUp<AnalysisManager>() { - @Mock - public TableStatsMeta findTableStatsStatus(long tblId) { - return null; - } - }; externalCatalog.getCatalogProperty().addProperty(ExternalCatalog.ENABLE_AUTO_ANALYZE, "false"); HMSExternalTable hmsTable1 = new HMSExternalTable(1, "name", "dbName", externalCatalog); externalCatalog.setAutoAnalyzePolicy("dbName", "name", "enable"); @@ -238,27 +233,10 @@ class StatisticsUtilTest { tableMeta.userInjected = false; Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); - // Test column hasn't been analyzed for longer than 1 day. new MockUp<TableStatsMeta>() { @Mock public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { - return new ColStatsMeta(0, null, null, null, 0, 100, 0, null); - } - }; - new MockUp<OlapTable>() { - @Mock - public long getRowCount() { - return 100; - } - }; - Config.auto_analyze_interval_seconds = 60 * 60 * 24; - Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); - Config.auto_analyze_interval_seconds = 0; - - new MockUp<TableStatsMeta>() { - @Mock - public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { - return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 0, 0, null); + return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 0, 0, 0, null); } }; @@ -312,7 +290,7 @@ class StatisticsUtilTest { new MockUp<TableStatsMeta>() { @Mock public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { - return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 0, null); + return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 0, 0, null); } }; tableMeta.partitionChanged.set(false); @@ -322,7 +300,7 @@ class StatisticsUtilTest { new MockUp<TableStatsMeta>() { @Mock public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { - return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 0, 0, null); + return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 0, 0, 0, null); } }; tableMeta.partitionChanged.set(false); @@ -338,7 +316,7 @@ class StatisticsUtilTest { new MockUp<TableStatsMeta>() { @Mock public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { - return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 500, 0, null); + return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 500, 0, 0, null); } }; tableMeta.partitionChanged.set(false); @@ -354,7 +332,7 @@ class StatisticsUtilTest { new MockUp<TableStatsMeta>() { @Mock public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { - return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 80, null); + return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 80, 0, null); } }; tableMeta.partitionChanged.set(false); @@ -382,6 +360,140 @@ class StatisticsUtilTest { tableMeta.partitionChanged.set(false); tableMeta.updatedRows.set(85); Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); + } + + @Test + void testLongTimeNoAnalyze() { + Column column = new Column("testColumn", PrimitiveType.INT); + List<Column> schema = new ArrayList<>(); + schema.add(column); + OlapTable table = new OlapTable(200, "testTable", schema, null, null, null); + // Test column is null + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, null)); + + // Test table auto analyze is disabled. + new MockUp<OlapTable>() { + @Mock + public boolean autoAnalyzeEnabled() { + return false; + } + }; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + new MockUp<OlapTable>() { + @Mock + public boolean autoAnalyzeEnabled() { + return true; + } + }; + + // Test external table + new MockUp<ExternalTable>() { + @Mock + public boolean autoAnalyzeEnabled() { + return true; + } + }; + IcebergExternalTable icebergTable = new IcebergExternalTable(0, "", "", null); + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(icebergTable, Pair.of("index", column.getName()))); + + // Test table stats meta is null. + new MockUp<AnalysisManager>() { + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return null; + } + }; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + + // Test column stats meta is null + TableStatsMeta tableMeta = new TableStatsMeta(); + new MockUp<AnalysisManager>() { + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return tableMeta; + } + }; + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { + return null; + } + }; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { + return new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 0, 0, null); + } + }; + + // Test table stats is user injected + tableMeta.userInjected = true; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + tableMeta.userInjected = false; + + // Test Config.auto_analyze_interval_seconds == 0 + Config.auto_analyze_interval_seconds = 0; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + + // Test column analyzed within the time interval + Config.auto_analyze_interval_seconds = 86400; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + + // Test column hasn't analyzed for longer than time interval, but version and row count doesn't change + new MockUp<TableStatsMeta>() { + @Mock + public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { + ColStatsMeta ret = new ColStatsMeta(System.currentTimeMillis(), null, null, null, 0, 100, 20, 10, null); + try { + Thread.sleep(1500); + } catch (InterruptedException e) { + e.printStackTrace(); + } + return ret; + } + }; + new MockUp<OlapTable>() { + @Mock + public long getVisibleVersion() { + return 10; + } + + @Mock + public long fetchRowCount() { + return 100; + } + }; + Config.auto_analyze_interval_seconds = 1; + Assertions.assertFalse(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + + // Test column hasn't analyzed for longer than time interval, and version change + new MockUp<OlapTable>() { + @Mock + public long getVisibleVersion() { + return 11; + } + + @Mock + public long fetchRowCount() { + return 100; + } + }; + Assertions.assertTrue(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); + + // Test column hasn't analyzed for longer than time interval, and row count change + new MockUp<OlapTable>() { + @Mock + public long getVisibleVersion() { + return 10; + } + + @Mock + public long fetchRowCount() { + return 101; + } + }; + Assertions.assertTrue(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); } } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 55074e995fe..69360da6911 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -2903,7 +2903,38 @@ PARTITION `p599` VALUES IN (599) assertEquals("521779.0", alter_result[0][5]) assertEquals("7.142863009760572", alter_result[0][6]) - sql """DROP DATABASE IF EXISTS trigger""" + + // Test show last analyze table version + sql """create database if not exists test_version""" + sql """use test_version""" + sql """drop table if exists region""" + sql """ + CREATE TABLE region ( + r_regionkey int NOT NULL, + r_name VARCHAR(25) NOT NULL, + r_comment VARCHAR(152) + )ENGINE=OLAP + DUPLICATE KEY(`r_regionkey`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """analyze table region with sync""" + def versionResult = sql """show column stats region""" + assertEquals(versionResult[0][16], "1") + assertEquals(versionResult[1][16], "1") + assertEquals(versionResult[2][16], "1") + + sql """insert into region values (1, "1", "1")""" + sql """analyze table region with sync""" + versionResult = sql """show column stats region""" + assertEquals(versionResult[0][16], "2") + assertEquals(versionResult[1][16], "2") + assertEquals(versionResult[2][16], "2") + + sql """drop database if exists test_version""" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org