morrySnow commented on code in PR #23507: URL: https://github.com/apache/doris/pull/23507#discussion_r1306851837
########## fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java: ########## @@ -127,80 +130,24 @@ public void execSQLs(List<String> partitionAnalysisSQLs, Map<String, String> par queryState.getErrorMessage())); } } - if (buf.size() > 1) { - for (List<ColStatsData> colStatsDataList : buf) { - StringBuilder batchInsertSQL = - new StringBuilder("INSERT INTO __internal_schema.column_statistics VALUES "); - StringJoiner sj = new StringJoiner(","); - colStatsDataList.forEach(c -> sj.add(c.toSQL(true))); - batchInsertSQL.append(sj.toString()); - stmtExecutor = new StmtExecutor(r.connectContext, batchInsertSQL.toString()); - executeWithExceptionOnFail(stmtExecutor); - } - params.put("type", col.getType().toString()); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE); - stmtExecutor = new StmtExecutor(r.connectContext, sql); - executeWithExceptionOnFail(stmtExecutor); - } else { - List<ColStatsData> colStatsDataList = buf.get(0); - String batchInsertSQLTemplate = "INSERT INTO __internal_schema.column_statistics " - + "SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id, row_count," - + "ndv, null_count, CAST(min AS string), CAST(max AS string), data_size, update_time FROM (" - + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " - + " ${catalogId} AS catalog_id, " - + " ${dbId} AS db_id, " - + " ${tblId} AS tbl_id, " - + " ${idxId} AS idx_id, " - + " '${colId}' AS col_id, " - + " NULL AS part_id, " - + " SUM(count) AS row_count," - + " SUM(null_count) AS null_count, " - + " MIN(CAST (min AS ${type})) AS min, " - + " MAX(CAST (max AS ${type})) AS max, " - + " SUM(data_size_in_bytes) AS data_size, " - + " NOW() AS update_time" - + " FROM (${partitionStatsView}) psv) t1, " - + " (SELECT NDV(`${colName}`) AS ndv " - + " FROM `${dbName}`.`${tblName}` ${sampleExpr}) t2 UNION ALL ${partitionStatsView}"; - StringJoiner sj = new StringJoiner(" UNION ALL "); - String selectPartitionTemplate = - "SELECT %s AS id," - + "%s AS catalog_id," - + "%s AS db_id," - + "%s AS tbl_id," - + "%s AS idx_id," - + "%s AS col_id," - + "%s AS part_id," - + "%s AS count," - + "%s AS ndv," - + "%s AS null_count," - + "%s as min," - + "%s as max," - + "%s as data_size_in_bytes," - + "%s AS update_time"; - colStatsDataList.forEach(c -> sj.add(String.format(selectPartitionTemplate, - StatisticsUtil.quote(c.statsId.id), - c.statsId.catalogId, - c.statsId.dbId, - c.statsId.tblId, - c.statsId.idxId, - StatisticsUtil.quote(c.statsId.colId), - c.statsId.partId, - c.count, - c.ndv, - c.nullCount, - c.minLit == null ? null : StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)), - c.maxLit == null ? null : StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)), - c.dataSizeInBytes, - StatisticsUtil.quote(c.updateTime)))); - params.put("partitionStatsView", sj.toString()); - params.put("type", col.getType().toString()); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String insertSQL = stringSubstitutor.replace(batchInsertSQLTemplate); - stmtExecutor = new StmtExecutor(r.connectContext, insertSQL); + for (List<ColStatsData> colStatsDataList : buf) { + StringBuilder batchInsertSQL = + new StringBuilder("INSERT INTO __internal_schema.column_statistics VALUES "); + StringJoiner sj = new StringJoiner(","); + colStatsDataList.forEach(c -> sj.add(c.toSQL(true))); + batchInsertSQL.append(sj.toString()); + stmtExecutor = new StmtExecutor(r.connectContext, batchInsertSQL.toString()); executeWithExceptionOnFail(stmtExecutor); } + params.put("type", col.getType().toString()); + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE); + stmtExecutor = new StmtExecutor(r.connectContext, sql); + executeWithExceptionOnFail(stmtExecutor); + } finally { + LOG.debug("ANALYZE TASK {} END COST TIME {}", info, + (System.currentTimeMillis() - startTime) + "ms"); Review Comment: ```suggestion LOG.debug("analyze task {} end. cost {}ms", info, System.currentTimeMillis() - startTime); ``` ########## fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java: ########## @@ -127,80 +130,24 @@ public void execSQLs(List<String> partitionAnalysisSQLs, Map<String, String> par queryState.getErrorMessage())); } } - if (buf.size() > 1) { - for (List<ColStatsData> colStatsDataList : buf) { - StringBuilder batchInsertSQL = - new StringBuilder("INSERT INTO __internal_schema.column_statistics VALUES "); - StringJoiner sj = new StringJoiner(","); - colStatsDataList.forEach(c -> sj.add(c.toSQL(true))); - batchInsertSQL.append(sj.toString()); - stmtExecutor = new StmtExecutor(r.connectContext, batchInsertSQL.toString()); - executeWithExceptionOnFail(stmtExecutor); - } - params.put("type", col.getType().toString()); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String sql = stringSubstitutor.replace(ANALYZE_COLUMN_SQL_TEMPLATE); - stmtExecutor = new StmtExecutor(r.connectContext, sql); - executeWithExceptionOnFail(stmtExecutor); - } else { - List<ColStatsData> colStatsDataList = buf.get(0); - String batchInsertSQLTemplate = "INSERT INTO __internal_schema.column_statistics " - + "SELECT id, catalog_id, db_id, tbl_id, idx_id, col_id, part_id, row_count," - + "ndv, null_count, CAST(min AS string), CAST(max AS string), data_size, update_time FROM (" - + "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, " - + " ${catalogId} AS catalog_id, " - + " ${dbId} AS db_id, " - + " ${tblId} AS tbl_id, " - + " ${idxId} AS idx_id, " - + " '${colId}' AS col_id, " - + " NULL AS part_id, " - + " SUM(count) AS row_count," - + " SUM(null_count) AS null_count, " - + " MIN(CAST (min AS ${type})) AS min, " - + " MAX(CAST (max AS ${type})) AS max, " - + " SUM(data_size_in_bytes) AS data_size, " - + " NOW() AS update_time" - + " FROM (${partitionStatsView}) psv) t1, " - + " (SELECT NDV(`${colName}`) AS ndv " - + " FROM `${dbName}`.`${tblName}` ${sampleExpr}) t2 UNION ALL ${partitionStatsView}"; - StringJoiner sj = new StringJoiner(" UNION ALL "); - String selectPartitionTemplate = - "SELECT %s AS id," - + "%s AS catalog_id," - + "%s AS db_id," - + "%s AS tbl_id," - + "%s AS idx_id," - + "%s AS col_id," - + "%s AS part_id," - + "%s AS count," - + "%s AS ndv," - + "%s AS null_count," - + "%s as min," - + "%s as max," - + "%s as data_size_in_bytes," - + "%s AS update_time"; - colStatsDataList.forEach(c -> sj.add(String.format(selectPartitionTemplate, - StatisticsUtil.quote(c.statsId.id), - c.statsId.catalogId, - c.statsId.dbId, - c.statsId.tblId, - c.statsId.idxId, - StatisticsUtil.quote(c.statsId.colId), - c.statsId.partId, - c.count, - c.ndv, - c.nullCount, - c.minLit == null ? null : StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.minLit)), - c.maxLit == null ? null : StatisticsUtil.quote(StatisticsUtil.escapeSQL(c.maxLit)), - c.dataSizeInBytes, - StatisticsUtil.quote(c.updateTime)))); - params.put("partitionStatsView", sj.toString()); - params.put("type", col.getType().toString()); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String insertSQL = stringSubstitutor.replace(batchInsertSQLTemplate); - stmtExecutor = new StmtExecutor(r.connectContext, insertSQL); + for (List<ColStatsData> colStatsDataList : buf) { + StringBuilder batchInsertSQL = + new StringBuilder("INSERT INTO __internal_schema.column_statistics VALUES "); Review Comment: `__internal_schema.column_statistics` shoud use `FULL_QUALIFIED_COLUMN_STATISTICS_NAME` ########## fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java: ########## @@ -2438,6 +2438,7 @@ public StatementBase setParsedStmt(StatementBase parsedStmt) { } public List<ResultRow> executeInternalQuery() { + LOG.debug("INTERNAL QUERY: " + originStmt.toString()); Review Comment: ```suggestion LOG.debug("execute internal query: {}", originStmt); ``` ########## fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoAnalyzer.java: ########## @@ -70,6 +70,7 @@ protected void runAfterCatalogReady() { if (!StatisticsUtil.statsTblAvailable()) { return; } + analyzePeriodically(); Review Comment: perroid analyze should in a serperate thread ########## fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java: ########## @@ -236,6 +236,7 @@ protected void executeWithExceptionOnFail(StmtExecutor stmtExecutor) throws Exce if (killed) { return; } + LOG.debug("INTERNAL SQL: " + stmtExecutor.getOriginStmt().toString()); Review Comment: ```suggestion LOG.debug("execute internal sql: {}", stmtExecutor.getOriginStmt()); ``` ########## fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java: ########## @@ -109,6 +110,8 @@ public void doExecute() throws Exception { @VisibleForTesting public void execSQLs(List<String> partitionAnalysisSQLs, Map<String, String> params) throws Exception { + long startTime = System.currentTimeMillis(); + LOG.debug("ANALYZE TASK {} START: {}", info.toString(), new Date()); Review Comment: ```suggestion LOG.debug("analyze task {} start at {}", info.toString(), new Date()); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org