weizhengte commented on code in PR #12765:
URL: https://github.com/apache/doris/pull/12765#discussion_r976075630


##########
fe/fe-core/src/main/java/org/apache/doris/statistics/SQLStatisticsTask.java:
##########
@@ -17,47 +17,119 @@
 
 package org.apache.doris.statistics;
 
-import org.apache.doris.analysis.SelectStmt;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.InvalidFormatException;
+import org.apache.doris.statistics.StatisticsTaskResult.TaskResult;
+import org.apache.doris.statistics.StatsGranularity.Granularity;
+import org.apache.doris.statistics.util.InternalQuery;
+import org.apache.doris.statistics.util.InternalQueryResult;
+import org.apache.doris.statistics.util.InternalQueryResult.ResultRow;
+import org.apache.doris.statistics.util.InternalSqlTemplate;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 import java.util.List;
+import java.util.Map;
 
 /**
  * A statistics task that collects statistics by executing query.
  * The results of the query will be returned as @StatisticsTaskResult.
  */
 public class SQLStatisticsTask extends StatisticsTask {
-    private SelectStmt query;
+    private String statement;
 
     public SQLStatisticsTask(long jobId, List<StatisticsDesc> statsDescs) {
         super(jobId, statsDescs);
     }
 
     @Override
     public StatisticsTaskResult call() throws Exception {
-        // TODO
-        // step1: construct query by statsDescList
-        constructQuery();
-        // step2: execute query
-        // the result should be sequence by @statsTypeList
-        List<String> queryResultList = executeQuery(query);
-        // step3: construct StatisticsTaskResult by query result
-        constructTaskResult(queryResultList);
-        return null;
+        checkStatisticsDesc();
+        List<TaskResult> taskResults = Lists.newArrayList();
+
+        for (StatisticsDesc statsDesc : statsDescs) {
+            statement = constructQuery(statsDesc);
+            TaskResult taskResult = executeQuery(statsDesc);
+            taskResults.add(taskResult);
+        }
+
+        return new StatisticsTaskResult(taskResults);
     }
 
-    protected void constructQuery() {
-        // TODO
-        // step1: construct FROM by @granularityDesc
-        // step2: construct SELECT LIST by @statsTypeList
+    protected String constructQuery(StatisticsDesc statsDesc) throws 
DdlException,
+            InvalidFormatException {
+        Map<String, String> params = getQueryParams(statsDesc);
+        List<StatsType> statsTypes = statsDesc.getStatsTypes();
+        StatsType type = statsTypes.get(0);
+
+        StatsGranularity statsGranularity = statsDesc.getStatsGranularity();
+        Granularity granularity = statsGranularity.getGranularity();
+        boolean nonPartitioned = granularity != Granularity.PARTITION;
+
+        switch (type) {

Review Comment:
   In fact, as much as possible, we have put statistics that can be collected 
together (such as MIN, MAX, NDV) into one group and collected in one SQL. At 
the same time, it also makes corresponding resource restrictions (such as 
parallelism, etc.). 
   
   However, if all the aggregated metrics are put into a SQL query and the 
resources are limited, the collection time may be longer; In addition, when 
there is a large amount of data, some indicators (e.g., MAX/AVG_COL_LEN) can be 
collected by sampling, but some indicators (e.g., MIN, MAX, NDV) need to be 
fully collected, so it may not be appropriate to put them all in one statement.
   
   Also, if the user only wants to quickly collect statistics for one 
statistical category, then in fact all statistics will be collected. 
   
   Including the problems mentioned above, as well as some other reasons, we 
have divided the tasks according to some rules~
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to