morrySnow commented on code in PR #14676: URL: https://github.com/apache/doris/pull/14676#discussion_r1036256434
########## fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java: ########## @@ -73,144 +70,46 @@ public class AnalyzeStmt extends DdlStmt { private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 0L; - private final TableName optTableName; + public final boolean wholeTbl; Review Comment: remove this var, add a function `analyzeAllColumns` ########## fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java: ########## @@ -145,19 +142,21 @@ public CreateTableStmt buildAnalysisJobTblStmt() throws UserException { FeConstants.INTERNAL_DB_NAME, StatisticConstants.ANALYSIS_JOB_TABLE); List<ColumnDef> columnDefs = new ArrayList<>(); columnDefs.add(new ColumnDef("job_id", TypeDef.create(PrimitiveType.BIGINT))); + columnDefs.add(new ColumnDef("task_id", TypeDef.create(PrimitiveType.BIGINT))); columnDefs.add(new ColumnDef("catalog_name", TypeDef.createVarchar(1024))); columnDefs.add(new ColumnDef("db_name", TypeDef.createVarchar(1024))); columnDefs.add(new ColumnDef("tbl_name", TypeDef.createVarchar(1024))); columnDefs.add(new ColumnDef("col_name", TypeDef.createVarchar(1024))); + columnDefs.add(new ColumnDef("index_id", TypeDef.create(PrimitiveType.BIGINT))); columnDefs.add(new ColumnDef("job_type", TypeDef.createVarchar(32))); columnDefs.add(new ColumnDef("analysis_type", TypeDef.createVarchar(32))); columnDefs.add(new ColumnDef("message", TypeDef.createVarchar(1024))); columnDefs.add(new ColumnDef("last_exec_time_in_ms", TypeDef.create(PrimitiveType.BIGINT))); columnDefs.add(new ColumnDef("state", TypeDef.createVarchar(32))); columnDefs.add(new ColumnDef("schedule_type", TypeDef.createVarchar(32))); String engineName = "olap"; - KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, - Lists.newArrayList("job_id")); + KeysDesc keysDesc = new KeysDesc(KeysType.DUP_KEYS, Review Comment: so, if a task' state updated, we insert a new tuple into this table? ########## fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java: ########## @@ -997,8 +1000,11 @@ public TTableDescriptor toThrift() { } @Override - public AnalysisJob createAnalysisJob(AnalysisJobScheduler scheduler, AnalysisJobInfo info) { - return new AnalysisJob(scheduler, info); + public BaseAnalysisTask createAnalysisTask(AnalysisTaskScheduler scheduler, AnalysisTaskInfo info) { Review Comment: what the relation bewteen job and task? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java: ########## @@ -119,14 +106,14 @@ public String toString() { sj.add("DBName: " + dbName); sj.add("TableName: " + tblName); sj.add("ColumnName: " + colName); - sj.add("JobType: " + analysisType.toString()); + sj.add("JobType: " + analysisMethod.toString()); Review Comment: why the attribute's desc is 'type', but the attribute's name is 'method'? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java: ########## @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.analysis.CreateMaterializedViewStmt; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.FunctionCallExpr; +import org.apache.doris.analysis.PartitionNames; +import org.apache.doris.analysis.SelectListItem; +import org.apache.doris.analysis.SelectStmt; +import org.apache.doris.analysis.SlotRef; +import org.apache.doris.analysis.SqlParser; +import org.apache.doris.analysis.SqlScanner; +import org.apache.doris.analysis.TableRef; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.MaterializedIndexMeta; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.common.FeConstants; +import org.apache.doris.common.util.SqlParserUtils; +import org.apache.doris.statistics.util.StatisticsUtil; + +import com.google.common.base.Preconditions; + +import java.io.StringReader; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * Analysis for the materialized view, only gets constructed when the AnalyzeStmt is not set which + * columns to be analyzed. + * TODO: Supports multi-table mv + */ +public class MVAnalysisTask extends BaseAnalysisTask { + + private static final String ANALYZE_MV_PART = INSERT_PART_STATISTICS + + " FROM (${sql}) mv"; + + private static final String ANALYZE_MV_COL = INSERT_COL_STATISTICS + + " (SELECT NDV(${colName}) AS ndv " Review Comment: add back quote to all identifier in analyze sql to avoid parse error in case of table or column name is keyword ########## fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java: ########## @@ -73,144 +70,46 @@ public class AnalyzeStmt extends DdlStmt { private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 0L; - private final TableName optTableName; + public final boolean wholeTbl; + + private final TableName tableName; + + private TableIf table; + private final PartitionNames optPartitionNames; private List<String> optColumnNames; private Map<String, String> optProperties; // after analyzed private long dbId; - private final Set<Long> tblIds = Sets.newHashSet(); + private final List<String> partitionNames = Lists.newArrayList(); - // TODO(wzt): support multiple tables - public AnalyzeStmt(TableName optTableName, + public AnalyzeStmt(TableName tableName, List<String> optColumnNames, PartitionNames optPartitionNames, Map<String, String> optProperties) { - this.optTableName = optTableName; + this.tableName = tableName; this.optColumnNames = optColumnNames; this.optPartitionNames = optPartitionNames; + wholeTbl = CollectionUtils.isEmpty(optColumnNames); this.optProperties = optProperties; } - public long getDbId() { - Preconditions.checkArgument(isAnalyzed(), - "The dbId must be obtained after the parsing is complete"); - return dbId; - } - - public Set<Long> getTblIds() { - Preconditions.checkArgument(isAnalyzed(), - "The tblIds must be obtained after the parsing is complete"); - return tblIds; - } - - public Database getDb() throws AnalysisException { - Preconditions.checkArgument(isAnalyzed(), - "The db must be obtained after the parsing is complete"); - return analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId); - } - - public List<Table> getTables() throws AnalysisException { - Preconditions.checkArgument(isAnalyzed(), - "The tables must be obtained after the parsing is complete"); - Database db = getDb(); - List<Table> tables = Lists.newArrayList(); - - db.readLock(); - try { - for (Long tblId : tblIds) { - Table table = db.getTableOrAnalysisException(tblId); - tables.add(table); - } - } finally { - db.readUnlock(); - } - - return tables; - } - - public List<String> getPartitionNames() { - Preconditions.checkArgument(isAnalyzed(), - "The partitionNames must be obtained after the parsing is complete"); - return partitionNames; - } - - /** - * The statistics task obtains partitions and then collects partition statistics, - * we need to filter out partitions that do not have data. - * - * @return map of tableId and partitionName - * @throws AnalysisException not analyzed - */ - public Map<Long, List<String>> getTableIdToPartitionName() throws AnalysisException { - Preconditions.checkArgument(isAnalyzed(), - "The partitionIds must be obtained after the parsing is complete"); - Map<Long, List<String>> tableIdToPartitionName = Maps.newHashMap(); - - for (Table table : getTables()) { - table.readLock(); - try { - OlapTable olapTable = (OlapTable) table; - List<String> partitionNames = getPartitionNames(); - List<String> newPartitionNames = new ArrayList<>(partitionNames); - if (newPartitionNames.isEmpty() && olapTable.isPartitioned()) { - newPartitionNames.addAll(olapTable.getPartitionNames()); - } - tableIdToPartitionName.put(table.getId(), newPartitionNames); - } finally { - table.readUnlock(); - } - } - return tableIdToPartitionName; - } - - public Map<Long, List<String>> getTableIdToColumnName() throws AnalysisException { - Preconditions.checkArgument(isAnalyzed(), - "The db name must be obtained after the parsing is complete"); - Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap(); - List<Table> tables = getTables(); - if (optColumnNames == null || optColumnNames.isEmpty()) { - for (Table table : tables) { - table.readLock(); - try { - long tblId = table.getId(); - List<Column> baseSchema = table.getBaseSchema(); - List<String> colNames = Lists.newArrayList(); - baseSchema.stream().map(Column::getName).forEach(colNames::add); - tableIdToColumnName.put(tblId, colNames); - } finally { - table.readUnlock(); - } - } - } else { - for (Long tblId : tblIds) { - tableIdToColumnName.put(tblId, optColumnNames); - } - } - - return tableIdToColumnName; - } - - public Map<String, String> getProperties() { - return optProperties; - } - @Override public void analyze(Analyzer analyzer) throws UserException { super.analyze(analyzer); // step1: analyze db, table and column - if (optTableName != null) { - optTableName.analyze(analyzer); + if (tableName != null) { Review Comment: if tableName must not null. we should add a check in constructor and remove this if statement -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org