[GitHub] [doris] morrySnow commented on a diff in pull request #14676: [featuer-wip](statistics) Support for collecting statistics on materialized view

GitBox Wed, 30 Nov 2022 09:47:49 -0800


morrySnow commented on code in PR #14676:
URL: https://github.com/apache/doris/pull/14676#discussion_r1036256434



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java:
##########
@@ -73,144 +70,46 @@ public class AnalyzeStmt extends DdlStmt {
 
     private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 
0L;
 
-    private final TableName optTableName;
+    public final boolean wholeTbl;

Review Comment:
   remove this var, add a function `analyzeAllColumns`



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java:
##########
@@ -145,19 +142,21 @@ public CreateTableStmt buildAnalysisJobTblStmt() throws 
UserException {
                 FeConstants.INTERNAL_DB_NAME, 
StatisticConstants.ANALYSIS_JOB_TABLE);
         List<ColumnDef> columnDefs = new ArrayList<>();
         columnDefs.add(new ColumnDef("job_id", 
TypeDef.create(PrimitiveType.BIGINT)));
+        columnDefs.add(new ColumnDef("task_id", 
TypeDef.create(PrimitiveType.BIGINT)));
         columnDefs.add(new ColumnDef("catalog_name", 
TypeDef.createVarchar(1024)));
         columnDefs.add(new ColumnDef("db_name", TypeDef.createVarchar(1024)));
         columnDefs.add(new ColumnDef("tbl_name", TypeDef.createVarchar(1024)));
         columnDefs.add(new ColumnDef("col_name", TypeDef.createVarchar(1024)));
+        columnDefs.add(new ColumnDef("index_id", 
TypeDef.create(PrimitiveType.BIGINT)));
         columnDefs.add(new ColumnDef("job_type", TypeDef.createVarchar(32)));
         columnDefs.add(new ColumnDef("analysis_type", 
TypeDef.createVarchar(32)));
         columnDefs.add(new ColumnDef("message", TypeDef.createVarchar(1024)));
         columnDefs.add(new ColumnDef("last_exec_time_in_ms", 
TypeDef.create(PrimitiveType.BIGINT)));
         columnDefs.add(new ColumnDef("state", TypeDef.createVarchar(32)));
         columnDefs.add(new ColumnDef("schedule_type", 
TypeDef.createVarchar(32)));
         String engineName = "olap";
-        KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS,
-                Lists.newArrayList("job_id"));
+        KeysDesc keysDesc = new KeysDesc(KeysType.DUP_KEYS,

Review Comment:
   so, if a task' state updated, we insert a new tuple into this table?



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java:
##########
@@ -997,8 +1000,11 @@ public TTableDescriptor toThrift() {
     }
 
     @Override
-    public AnalysisJob createAnalysisJob(AnalysisJobScheduler scheduler, 
AnalysisJobInfo info) {
-        return new AnalysisJob(scheduler, info);
+    public BaseAnalysisTask createAnalysisTask(AnalysisTaskScheduler 
scheduler, AnalysisTaskInfo info) {

Review Comment:
   what the relation bewteen job and task?



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskInfo.java:
##########
@@ -119,14 +106,14 @@ public String toString() {
         sj.add("DBName: " + dbName);
         sj.add("TableName: " + tblName);
         sj.add("ColumnName: " + colName);
-        sj.add("JobType: " + analysisType.toString());
+        sj.add("JobType: " + analysisMethod.toString());

Review Comment:
   why the attribute's desc is 'type', but the attribute's name is 'method'?



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/MVAnalysisTask.java:
##########
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.analysis.CreateMaterializedViewStmt;
+import org.apache.doris.analysis.Expr;
+import org.apache.doris.analysis.FunctionCallExpr;
+import org.apache.doris.analysis.PartitionNames;
+import org.apache.doris.analysis.SelectListItem;
+import org.apache.doris.analysis.SelectStmt;
+import org.apache.doris.analysis.SlotRef;
+import org.apache.doris.analysis.SqlParser;
+import org.apache.doris.analysis.SqlScanner;
+import org.apache.doris.analysis.TableRef;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.MaterializedIndexMeta;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.common.FeConstants;
+import org.apache.doris.common.util.SqlParserUtils;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.google.common.base.Preconditions;
+
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Analysis for the materialized view, only gets constructed when the 
AnalyzeStmt is not set which
+ * columns to be analyzed.
+ * TODO: Supports multi-table mv
+ */
+public class MVAnalysisTask extends BaseAnalysisTask {
+
+    private static final String ANALYZE_MV_PART = INSERT_PART_STATISTICS
+            + " FROM (${sql}) mv";
+
+    private static final String ANALYZE_MV_COL = INSERT_COL_STATISTICS
+            + "     (SELECT NDV(${colName}) AS ndv "

Review Comment:
   add back quote to all identifier in analyze sql to avoid parse error in case 
of table or column name is keyword



##########
fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java:
##########
@@ -73,144 +70,46 @@ public class AnalyzeStmt extends DdlStmt {
 
     private static final Predicate<Long> DESIRED_TASK_TIMEOUT_SEC = (v) -> v > 
0L;
 
-    private final TableName optTableName;
+    public final boolean wholeTbl;
+
+    private final TableName tableName;
+
+    private TableIf table;
+
     private final PartitionNames optPartitionNames;
     private List<String> optColumnNames;
     private Map<String, String> optProperties;
 
     // after analyzed
     private long dbId;
-    private final Set<Long> tblIds = Sets.newHashSet();
+
     private final List<String> partitionNames = Lists.newArrayList();
 
-    // TODO(wzt): support multiple tables
-    public AnalyzeStmt(TableName optTableName,
+    public AnalyzeStmt(TableName tableName,
             List<String> optColumnNames,
             PartitionNames optPartitionNames,
             Map<String, String> optProperties) {
-        this.optTableName = optTableName;
+        this.tableName = tableName;
         this.optColumnNames = optColumnNames;
         this.optPartitionNames = optPartitionNames;
+        wholeTbl = CollectionUtils.isEmpty(optColumnNames);
         this.optProperties = optProperties;
     }
 
-    public long getDbId() {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The dbId must be obtained after the parsing is complete");
-        return dbId;
-    }
-
-    public Set<Long> getTblIds() {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The tblIds must be obtained after the parsing is complete");
-        return tblIds;
-    }
-
-    public Database getDb() throws AnalysisException {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The db must be obtained after the parsing is complete");
-        return 
analyzer.getEnv().getInternalCatalog().getDbOrAnalysisException(dbId);
-    }
-
-    public List<Table> getTables() throws AnalysisException {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The tables must be obtained after the parsing is complete");
-        Database db = getDb();
-        List<Table> tables = Lists.newArrayList();
-
-        db.readLock();
-        try {
-            for (Long tblId : tblIds) {
-                Table table = db.getTableOrAnalysisException(tblId);
-                tables.add(table);
-            }
-        } finally {
-            db.readUnlock();
-        }
-
-        return tables;
-    }
-
-    public List<String> getPartitionNames() {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The partitionNames must be obtained after the parsing is 
complete");
-        return partitionNames;
-    }
-
-    /**
-     * The statistics task obtains partitions and then collects partition 
statistics,
-     * we need to filter out partitions that do not have data.
-     *
-     * @return map of tableId and partitionName
-     * @throws AnalysisException not analyzed
-     */
-    public Map<Long, List<String>> getTableIdToPartitionName() throws 
AnalysisException {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The partitionIds must be obtained after the parsing is 
complete");
-        Map<Long, List<String>> tableIdToPartitionName = Maps.newHashMap();
-
-        for (Table table : getTables()) {
-            table.readLock();
-            try {
-                OlapTable olapTable = (OlapTable) table;
-                List<String> partitionNames = getPartitionNames();
-                List<String> newPartitionNames = new 
ArrayList<>(partitionNames);
-                if (newPartitionNames.isEmpty() && olapTable.isPartitioned()) {
-                    newPartitionNames.addAll(olapTable.getPartitionNames());
-                }
-                tableIdToPartitionName.put(table.getId(), newPartitionNames);
-            } finally {
-                table.readUnlock();
-            }
-        }
-        return tableIdToPartitionName;
-    }
-
-    public Map<Long, List<String>> getTableIdToColumnName() throws 
AnalysisException {
-        Preconditions.checkArgument(isAnalyzed(),
-                "The db name must be obtained after the parsing is complete");
-        Map<Long, List<String>> tableIdToColumnName = Maps.newHashMap();
-        List<Table> tables = getTables();
-        if (optColumnNames == null || optColumnNames.isEmpty()) {
-            for (Table table : tables) {
-                table.readLock();
-                try {
-                    long tblId = table.getId();
-                    List<Column> baseSchema = table.getBaseSchema();
-                    List<String> colNames = Lists.newArrayList();
-                    
baseSchema.stream().map(Column::getName).forEach(colNames::add);
-                    tableIdToColumnName.put(tblId, colNames);
-                } finally {
-                    table.readUnlock();
-                }
-            }
-        } else {
-            for (Long tblId : tblIds) {
-                tableIdToColumnName.put(tblId, optColumnNames);
-            }
-        }
-
-        return tableIdToColumnName;
-    }
-
-    public Map<String, String> getProperties() {
-        return optProperties;
-    }
-
     @Override
     public void analyze(Analyzer analyzer) throws UserException {
         super.analyze(analyzer);
 
         // step1: analyze db, table and column
-        if (optTableName != null) {
-            optTableName.analyze(analyzer);
+        if (tableName != null) {

Review Comment:
   if tableName must not null. we should add a check in constructor and remove 
this if statement



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [doris] morrySnow commented on a diff in pull request #14676: [featuer-wip](statistics) Support for collecting statistics on materialized view

Reply via email to