This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 971b35c6c2 [Improvement](statistics)Improve show analyze performance. (#22484) (#22882) 971b35c6c2 is described below commit 971b35c6c27831fa074d2fe9470da72fcbeae473 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri Aug 11 17:50:18 2023 +0800 [Improvement](statistics)Improve show analyze performance. (#22484) (#22882) --- .../java/org/apache/doris/statistics/AnalysisInfo.java | 17 ++++++++++++++++- .../apache/doris/statistics/AnalysisInfoBuilder.java | 11 ++++++++++- .../org/apache/doris/statistics/AnalysisManager.java | 18 +++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 592fb4a99b..ee39582aac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -37,6 +37,7 @@ import java.io.IOException; import java.lang.reflect.Type; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringJoiner; @@ -77,9 +78,14 @@ public class AnalysisInfo implements Writable { @SerializedName("jobId") public final long jobId; + // When this AnalysisInfo represent a task, this is the task id for it. @SerializedName("taskId") public final long taskId; + // When this AnalysisInfo represent a job, this is the list of task ids belong to this job. + @SerializedName("taskIds") + public final List<Long> taskIds; + @SerializedName("catalogName") public final String catalogName; @@ -153,7 +159,11 @@ public class AnalysisInfo implements Writable { @SerializedName("samplingPartition") public boolean samplingPartition; - public AnalysisInfo(long jobId, long taskId, String catalogName, String dbName, String tblName, + // For serialize + @SerializedName("cronExpr") + public String cronExprStr; + + public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, String catalogName, String dbName, String tblName, Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, int samplePercent, int sampleRows, int maxBucketNum, long periodTimeInMs, String message, @@ -161,6 +171,7 @@ public class AnalysisInfo implements Writable { boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition) { this.jobId = jobId; this.taskId = taskId; + this.taskIds = taskIds; this.catalogName = catalogName; this.dbName = dbName; this.tblName = tblName; @@ -231,6 +242,10 @@ public class AnalysisInfo implements Writable { return taskId == -1; } + public void addTaskId(long taskId) { + taskIds.add(taskId); + } + // TODO: use thrift public static AnalysisInfo fromResultRow(ResultRow resultRow) { try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 7d8b99502c..2fd0e25d72 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -23,12 +23,14 @@ import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.AnalysisInfo.ScheduleType; +import java.util.List; import java.util.Map; import java.util.Set; public class AnalysisInfoBuilder { private long jobId; private long taskId; + private List<Long> taskIds; private String catalogName; private String dbName; private String tblName; @@ -59,6 +61,7 @@ public class AnalysisInfoBuilder { public AnalysisInfoBuilder(AnalysisInfo info) { jobId = info.jobId; taskId = info.taskId; + taskIds = info.taskIds; catalogName = info.catalogName; dbName = info.dbName; tblName = info.tblName; @@ -94,6 +97,11 @@ public class AnalysisInfoBuilder { return this; } + public AnalysisInfoBuilder setTaskIds(List<Long> taskIds) { + this.taskIds = taskIds; + return this; + } + public AnalysisInfoBuilder setCatalogName(String catalogName) { this.catalogName = catalogName; return this; @@ -210,7 +218,7 @@ public class AnalysisInfoBuilder { } public AnalysisInfo build() { - return new AnalysisInfo(jobId, taskId, catalogName, dbName, tblName, colToPartitions, partitionNames, + return new AnalysisInfo(jobId, taskId, taskIds, catalogName, dbName, tblName, colToPartitions, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition); @@ -220,6 +228,7 @@ public class AnalysisInfoBuilder { return new AnalysisInfoBuilder() .setJobId(jobId) .setTaskId(taskId) + .setTaskIds(taskIds) .setCatalogName(catalogName) .setDbName(dbName) .setTblName(tblName) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index a7e545f98c..e549bd6b0b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -58,6 +58,7 @@ import org.apache.doris.statistics.AnalysisInfo.ScheduleType; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.logging.log4j.LogManager; @@ -439,6 +440,7 @@ public class AnalysisManager extends Daemon implements Writable { Map<String, Set<String>> colToPartitions = validateAndGetPartitions(table, columnNames, partitionNames, analysisType, analysisMode); taskInfoBuilder.setColToPartitions(colToPartitions); + taskInfoBuilder.setTaskIds(Lists.newArrayList()); return taskInfoBuilder.build(); } @@ -511,6 +513,7 @@ public class AnalysisManager extends Daemon implements Writable { AnalysisInfoBuilder indexTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo); AnalysisInfo analysisInfo = indexTaskInfoBuilder.setIndexId(indexId) .setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build(); + jobInfo.addTaskId(taskId); if (isSync) { return; } @@ -537,6 +540,7 @@ public class AnalysisManager extends Daemon implements Writable { AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId) .setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build(); analysisTasks.put(taskId, createTask(analysisInfo)); + jobInfo.addTaskId(taskId); if (isSync) { continue; } @@ -580,6 +584,7 @@ public class AnalysisManager extends Daemon implements Writable { AnalysisInfo analysisInfo = colTaskInfoBuilder.setIndexId(-1L).setLastExecTimeInMs(System.currentTimeMillis()) .setTaskId(taskId).setColName("TableRowCount").setExternalTableLevelTask(true).build(); analysisTasks.put(taskId, createTask(analysisInfo)); + jobInfo.addTaskId(taskId); if (isSync) { // For sync job, don't need to persist, return here and execute it immediately. return; @@ -708,7 +713,10 @@ public class AnalysisManager extends Daemon implements Writable { } public String getJobProgress(long jobId) { - List<AnalysisInfo> tasks = findTasks(jobId); + List<AnalysisInfo> tasks = findTasksByTaskIds(jobId); + if (tasks == null) { + return "N/A"; + } int finished = 0; int failed = 0; int inProgress = 0; @@ -921,6 +929,14 @@ public class AnalysisManager extends Daemon implements Writable { } } + public List<AnalysisInfo> findTasksByTaskIds(long jobId) { + AnalysisInfo jobInfo = analysisJobInfoMap.get(jobId); + if (jobInfo != null && jobInfo.taskIds != null) { + return jobInfo.taskIds.stream().map(id -> analysisTaskInfoMap.get(id)).collect(Collectors.toList()); + } + return null; + } + public void removeAll(List<AnalysisInfo> analysisInfos) { for (AnalysisInfo analysisInfo : analysisInfos) { analysisTaskInfoMap.remove(analysisInfo.taskId); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org