morrySnow commented on code in PR #23507:
URL: https://github.com/apache/doris/pull/23507#discussion_r1306851837


##########
fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java:
##########
@@ -127,80 +130,24 @@ public void execSQLs(List<String> partitionAnalysisSQLs, 
Map<String, String> par
                             queryState.getErrorMessage()));
                 }
             }
-            if (buf.size() > 1) {
-                for (List<ColStatsData> colStatsDataList : buf) {
-                    StringBuilder batchInsertSQL =
-                            new StringBuilder("INSERT INTO 
__internal_schema.column_statistics VALUES ");
-                    StringJoiner sj = new StringJoiner(",");
-                    colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
-                    batchInsertSQL.append(sj.toString());
-                    stmtExecutor = new StmtExecutor(r.connectContext, 
batchInsertSQL.toString());
-                    executeWithExceptionOnFail(stmtExecutor);
-                }
-                params.put("type", col.getType().toString());
-                StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
-                String sql = 
stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
-                stmtExecutor = new StmtExecutor(r.connectContext, sql);
-                executeWithExceptionOnFail(stmtExecutor);
-            } else {
-                List<ColStatsData> colStatsDataList = buf.get(0);
-                String batchInsertSQLTemplate = "INSERT INTO 
__internal_schema.column_statistics "
-                        + "SELECT  id, catalog_id, db_id, tbl_id, idx_id, 
col_id, part_id, row_count,"
-                        + "ndv, null_count, CAST(min AS string), CAST(max AS 
string), data_size, update_time FROM ("
-                        + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', 
'${colId}') AS id, "
-                        + "         ${catalogId} AS catalog_id, "
-                        + "         ${dbId} AS db_id, "
-                        + "         ${tblId} AS tbl_id, "
-                        + "         ${idxId} AS idx_id, "
-                        + "         '${colId}' AS col_id, "
-                        + "         NULL AS part_id, "
-                        + "         SUM(count) AS row_count,"
-                        + "         SUM(null_count) AS null_count, "
-                        + "         MIN(CAST (min AS ${type})) AS min, "
-                        + "         MAX(CAST (max AS ${type})) AS max, "
-                        + "         SUM(data_size_in_bytes) AS data_size, "
-                        + "         NOW() AS update_time"
-                        + "     FROM (${partitionStatsView}) psv) t1, "
-                        + "     (SELECT NDV(`${colName}`) AS ndv "
-                        + "     FROM `${dbName}`.`${tblName}` ${sampleExpr}) 
t2 UNION ALL ${partitionStatsView}";
-                StringJoiner sj = new StringJoiner(" UNION ALL ");
-                String selectPartitionTemplate =
-                        "SELECT %s AS id,"
-                                + "%s AS catalog_id,"
-                                + "%s AS db_id,"
-                                + "%s AS tbl_id,"
-                                + "%s AS idx_id,"
-                                + "%s AS col_id,"
-                                + "%s AS part_id,"
-                                + "%s AS count,"
-                                + "%s AS ndv,"
-                                + "%s AS null_count,"
-                                + "%s as min,"
-                                + "%s as max,"
-                                + "%s as data_size_in_bytes,"
-                                + "%s AS update_time";
-                colStatsDataList.forEach(c -> 
sj.add(String.format(selectPartitionTemplate,
-                        StatisticsUtil.quote(c.statsId.id),
-                        c.statsId.catalogId,
-                        c.statsId.dbId,
-                        c.statsId.tblId,
-                        c.statsId.idxId,
-                        StatisticsUtil.quote(c.statsId.colId),
-                        c.statsId.partId,
-                        c.count,
-                        c.ndv,
-                        c.nullCount,
-                        c.minLit == null ? null : 
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)),
-                        c.maxLit == null ? null : 
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)),
-                        c.dataSizeInBytes,
-                        StatisticsUtil.quote(c.updateTime))));
-                params.put("partitionStatsView", sj.toString());
-                params.put("type", col.getType().toString());
-                StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
-                String insertSQL = 
stringSubstitutor.replace(batchInsertSQLTemplate);
-                stmtExecutor = new StmtExecutor(r.connectContext, insertSQL);
+            for (List<ColStatsData> colStatsDataList : buf) {
+                StringBuilder batchInsertSQL =
+                        new StringBuilder("INSERT INTO 
__internal_schema.column_statistics VALUES ");
+                StringJoiner sj = new StringJoiner(",");
+                colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
+                batchInsertSQL.append(sj.toString());
+                stmtExecutor = new StmtExecutor(r.connectContext, 
batchInsertSQL.toString());
                 executeWithExceptionOnFail(stmtExecutor);
             }
+            params.put("type", col.getType().toString());
+            StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
+            String sql = 
stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
+            stmtExecutor = new StmtExecutor(r.connectContext, sql);
+            executeWithExceptionOnFail(stmtExecutor);
+        } finally {
+            LOG.debug("ANALYZE TASK {} END COST TIME {}", info,
+                    (System.currentTimeMillis() - startTime) + "ms");

Review Comment:
   ```suggestion
               LOG.debug("analyze task {} end. cost {}ms", info,
                       System.currentTimeMillis() - startTime);
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java:
##########
@@ -127,80 +130,24 @@ public void execSQLs(List<String> partitionAnalysisSQLs, 
Map<String, String> par
                             queryState.getErrorMessage()));
                 }
             }
-            if (buf.size() > 1) {
-                for (List<ColStatsData> colStatsDataList : buf) {
-                    StringBuilder batchInsertSQL =
-                            new StringBuilder("INSERT INTO 
__internal_schema.column_statistics VALUES ");
-                    StringJoiner sj = new StringJoiner(",");
-                    colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
-                    batchInsertSQL.append(sj.toString());
-                    stmtExecutor = new StmtExecutor(r.connectContext, 
batchInsertSQL.toString());
-                    executeWithExceptionOnFail(stmtExecutor);
-                }
-                params.put("type", col.getType().toString());
-                StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
-                String sql = 
stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE);
-                stmtExecutor = new StmtExecutor(r.connectContext, sql);
-                executeWithExceptionOnFail(stmtExecutor);
-            } else {
-                List<ColStatsData> colStatsDataList = buf.get(0);
-                String batchInsertSQLTemplate = "INSERT INTO 
__internal_schema.column_statistics "
-                        + "SELECT  id, catalog_id, db_id, tbl_id, idx_id, 
col_id, part_id, row_count,"
-                        + "ndv, null_count, CAST(min AS string), CAST(max AS 
string), data_size, update_time FROM ("
-                        + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', 
'${colId}') AS id, "
-                        + "         ${catalogId} AS catalog_id, "
-                        + "         ${dbId} AS db_id, "
-                        + "         ${tblId} AS tbl_id, "
-                        + "         ${idxId} AS idx_id, "
-                        + "         '${colId}' AS col_id, "
-                        + "         NULL AS part_id, "
-                        + "         SUM(count) AS row_count,"
-                        + "         SUM(null_count) AS null_count, "
-                        + "         MIN(CAST (min AS ${type})) AS min, "
-                        + "         MAX(CAST (max AS ${type})) AS max, "
-                        + "         SUM(data_size_in_bytes) AS data_size, "
-                        + "         NOW() AS update_time"
-                        + "     FROM (${partitionStatsView}) psv) t1, "
-                        + "     (SELECT NDV(`${colName}`) AS ndv "
-                        + "     FROM `${dbName}`.`${tblName}` ${sampleExpr}) 
t2 UNION ALL ${partitionStatsView}";
-                StringJoiner sj = new StringJoiner(" UNION ALL ");
-                String selectPartitionTemplate =
-                        "SELECT %s AS id,"
-                                + "%s AS catalog_id,"
-                                + "%s AS db_id,"
-                                + "%s AS tbl_id,"
-                                + "%s AS idx_id,"
-                                + "%s AS col_id,"
-                                + "%s AS part_id,"
-                                + "%s AS count,"
-                                + "%s AS ndv,"
-                                + "%s AS null_count,"
-                                + "%s as min,"
-                                + "%s as max,"
-                                + "%s as data_size_in_bytes,"
-                                + "%s AS update_time";
-                colStatsDataList.forEach(c -> 
sj.add(String.format(selectPartitionTemplate,
-                        StatisticsUtil.quote(c.statsId.id),
-                        c.statsId.catalogId,
-                        c.statsId.dbId,
-                        c.statsId.tblId,
-                        c.statsId.idxId,
-                        StatisticsUtil.quote(c.statsId.colId),
-                        c.statsId.partId,
-                        c.count,
-                        c.ndv,
-                        c.nullCount,
-                        c.minLit == null ? null : 
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)),
-                        c.maxLit == null ? null : 
StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)),
-                        c.dataSizeInBytes,
-                        StatisticsUtil.quote(c.updateTime))));
-                params.put("partitionStatsView", sj.toString());
-                params.put("type", col.getType().toString());
-                StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
-                String insertSQL = 
stringSubstitutor.replace(batchInsertSQLTemplate);
-                stmtExecutor = new StmtExecutor(r.connectContext, insertSQL);
+            for (List<ColStatsData> colStatsDataList : buf) {
+                StringBuilder batchInsertSQL =
+                        new StringBuilder("INSERT INTO 
__internal_schema.column_statistics VALUES ");

Review Comment:
   `__internal_schema.column_statistics` shoud use 
`FULL_QUALIFIED_COLUMN_STATISTICS_NAME`



##########
fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java:
##########
@@ -2438,6 +2438,7 @@ public StatementBase setParsedStmt(StatementBase 
parsedStmt) {
     }
 
     public List<ResultRow> executeInternalQuery() {
+        LOG.debug("INTERNAL QUERY: " + originStmt.toString());

Review Comment:
   ```suggestion
           LOG.debug("execute internal query: {}", originStmt);
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java:
##########
@@ -70,6 +70,7 @@ protected void runAfterCatalogReady() {
         if (!StatisticsUtil.statsTblAvailable()) {
             return;
         }
+        analyzePeriodically();

Review Comment:
   perroid analyze should in a serperate thread



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java:
##########
@@ -236,6 +236,7 @@ protected void executeWithExceptionOnFail(StmtExecutor 
stmtExecutor) throws Exce
         if (killed) {
             return;
         }
+        LOG.debug("INTERNAL SQL: " + stmtExecutor.getOriginStmt().toString());

Review Comment:
   ```suggestion
           LOG.debug("execute internal sql: {}", stmtExecutor.getOriginStmt());
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java:
##########
@@ -109,6 +110,8 @@ public void doExecute() throws Exception {
 
     @VisibleForTesting
     public void execSQLs(List<String> partitionAnalysisSQLs, Map<String, 
String> params) throws Exception {
+        long startTime = System.currentTimeMillis();
+        LOG.debug("ANALYZE TASK {} START: {}", info.toString(), new Date());

Review Comment:
   ```suggestion
           LOG.debug("analyze task {} start at {}", info.toString(), new 
Date());
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to