morningman commented on code in PR #20415: URL: https://github.com/apache/doris/pull/20415#discussion_r1219717424
########## fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java: ########## @@ -345,6 +346,13 @@ public Partition getPartition(List<String> partitionValues) { return client.getPartition(dbName, name, partitionValues); } + @Override + public Set<String> getPartitionNames() { + PooledHiveMetaStoreClient client = ((HMSExternalCatalog) catalog).getClient(); Review Comment: call `makeSureInitialized()` before ########## fe/fe-core/src/main/cup/sql_parser.cup: ########## @@ -2522,6 +2522,16 @@ opt_col_list ::= :} ; +opt_partition_list ::= Review Comment: We already had an entry `partition_names ::=` ########## fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java: ########## @@ -308,6 +308,10 @@ private Map<String, Set<String>> validateAndGetPartitions(TableIf table, Set<Str return columnToPartitions; } + if (table instanceof HMSExternalTable) { Review Comment: Add some comment to explain this early return ########## fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java: ########## @@ -541,6 +551,9 @@ private void createTaskForExternalTable(AnalysisInfo jobInfo, AnalysisInfo analysisInfo = colTaskInfoBuilder.setIndexId(-1L) .setTaskId(taskId).setExternalTableLevelTask(true).build(); analysisTasks.put(taskId, createTask(analysisInfo)); + if (isSync) { Review Comment: Add some comment to explain this early return ########## fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java: ########## @@ -109,66 +117,140 @@ public HiveAnalysisTask(AnalysisInfo info) { */ @Override protected void getStatsBySql() throws Exception { - getTableStatsBySql(); - getPartitionStatsBySql(); - getTableColumnStatsBySql(); - getPartitionColumnStatsBySql(); + if (isTableLevelTask) { + getTableStatsBySql(); + } else { + getTableColumnStatsBySql(); + } } /** * Get table row count and insert the result to __internal_schema.table_statistics */ private void getTableStatsBySql() throws Exception { - Map<String, String> params = buildTableStatsParams(); - List<InternalQueryResult.ResultRow> columnResult = - StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) - .replace(ANALYZE_TABLE_COUNT_TEMPLATE)); - String rowCount = columnResult.get(0).getColumnValue("rowCount"); - params.put("rowCount", rowCount); - StatisticsRepository.persistTableStats(params); + // Get table level information. Review Comment: Add an example of the final sql in comment ########## fe/fe-core/src/main/java/org/apache/doris/statistics/HiveAnalysisTask.java: ########## @@ -109,66 +117,140 @@ public HiveAnalysisTask(AnalysisInfo info) { */ @Override protected void getStatsBySql() throws Exception { - getTableStatsBySql(); - getPartitionStatsBySql(); - getTableColumnStatsBySql(); - getPartitionColumnStatsBySql(); + if (isTableLevelTask) { + getTableStatsBySql(); + } else { + getTableColumnStatsBySql(); + } } /** * Get table row count and insert the result to __internal_schema.table_statistics */ private void getTableStatsBySql() throws Exception { - Map<String, String> params = buildTableStatsParams(); - List<InternalQueryResult.ResultRow> columnResult = - StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) - .replace(ANALYZE_TABLE_COUNT_TEMPLATE)); - String rowCount = columnResult.get(0).getColumnValue("rowCount"); - params.put("rowCount", rowCount); - StatisticsRepository.persistTableStats(params); + // Get table level information. + Map<String, String> parameters = table.getRemoteTable().getParameters(); + if (isPartitionOnly) { + for (String partId : partitionNames) { + StringBuilder sb = new StringBuilder(); + sb.append(ANALYZE_TABLE_COUNT_TEMPLATE); + sb.append(" where "); + String[] splits = partId.split("/"); + for (int i = 0; i < splits.length; i++) { + String value = splits[i].split("=")[1]; + splits[i] = splits[i].replace(value, "\'" + value + "\'"); + } + sb.append(StringUtils.join(splits, " and ")); + Map<String, String> params = buildTableStatsParams(partId); + setParameterData(parameters, params); + List<InternalQueryResult.ResultRow> columnResult = + StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) + .replace(sb.toString())); + String rowCount = columnResult.get(0).getColumnValue("rowCount"); + params.put("rowCount", rowCount); + StatisticsRepository.persistTableStats(params); + } + } else { + Map<String, String> params = buildTableStatsParams("NULL"); + List<InternalQueryResult.ResultRow> columnResult = + StatisticsUtil.execStatisticQuery(new StringSubstitutor(params) + .replace(ANALYZE_TABLE_COUNT_TEMPLATE)); + String rowCount = columnResult.get(0).getColumnValue("rowCount"); + params.put("rowCount", rowCount); + StatisticsRepository.persistTableStats(params); + } } /** * Get column statistics and insert the result to __internal_schema.column_statistics */ private void getTableColumnStatsBySql() throws Exception { - Map<String, String> params = buildTableStatsParams(); - params.put("internalDB", FeConstants.INTERNAL_DB_NAME); - params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME); - params.put("colName", col.getName()); - params.put("colId", info.colName); - params.put("dataSizeFunction", getDataSizeFunction(col)); - StringSubstitutor stringSubstitutor = new StringSubstitutor(params); - String sql = stringSubstitutor.replace(ANALYZE_SQL_TABLE_TEMPLATE); - try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) { - r.connectContext.getSessionVariable().disableNereidsPlannerOnce(); - this.stmtExecutor = new StmtExecutor(r.connectContext, sql); - this.stmtExecutor.execute(); + if (isPartitionOnly) { Review Comment: Add an example of the final sql in comment -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org