morningman commented on code in PR #24853: URL: https://github.com/apache/doris/pull/24853#discussion_r1336086011
########## fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java: ########## @@ -144,27 +173,101 @@ private void getTableColumnStats() throws Exception { // MAX(`r_regionkey`) AS max, // 0 AS data_size, // NOW() FROM `hive`.`tpch100`.`region` - if (isPartitionOnly) { - getPartitionNames(); - List<String> partitionAnalysisSQLs = new ArrayList<>(); - for (String partId : this.partitionNames) { - partitionAnalysisSQLs.add(generateSqlForPartition(partId)); + StringBuilder sb = new StringBuilder(); + sb.append(ANALYZE_TABLE_TEMPLATE); + Map<String, String> params = buildStatsParams("NULL"); + params.put("dataSizeFunction", getDataSizeFunction(col)); + params.put("nullCountExpr", getNullCountExpression()); + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + String sql = stringSubstitutor.replace(sb.toString()); + executeInsertSql(sql); + } + + private void getPartitionColumnStats() throws Exception { + Set<String> partitionNames = table.getPartitionNames(); + Set<String> ndvPartValues = Sets.newHashSet(); + long numNulls = 0; + long dataSize = 0; + String min = null; + String max = null; + for (String names : partitionNames) { + // names is like "date=20230101" for one level partition + // and like "date=20230101/hour=12" for two level partition + String[] parts = names.split("/"); + for (String partName : parts) { + if (partName.startsWith(col.getName()) && partName.contains("=")) { + String value = partName.split("=")[1]; Review Comment: These may throw `ArrayIndexOutOfBound`? ########## fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java: ########## @@ -144,27 +173,101 @@ private void getTableColumnStats() throws Exception { // MAX(`r_regionkey`) AS max, // 0 AS data_size, // NOW() FROM `hive`.`tpch100`.`region` - if (isPartitionOnly) { - getPartitionNames(); - List<String> partitionAnalysisSQLs = new ArrayList<>(); - for (String partId : this.partitionNames) { - partitionAnalysisSQLs.add(generateSqlForPartition(partId)); + StringBuilder sb = new StringBuilder(); + sb.append(ANALYZE_TABLE_TEMPLATE); + Map<String, String> params = buildStatsParams("NULL"); + params.put("dataSizeFunction", getDataSizeFunction(col)); + params.put("nullCountExpr", getNullCountExpression()); + StringSubstitutor stringSubstitutor = new StringSubstitutor(params); + String sql = stringSubstitutor.replace(sb.toString()); + executeInsertSql(sql); + } + + private void getPartitionColumnStats() throws Exception { + Set<String> partitionNames = table.getPartitionNames(); + Set<String> ndvPartValues = Sets.newHashSet(); + long numNulls = 0; + long dataSize = 0; + String min = null; + String max = null; + for (String names : partitionNames) { + // names is like "date=20230101" for one level partition + // and like "date=20230101/hour=12" for two level partition + String[] parts = names.split("/"); Review Comment: I think we should extract this partition value parsing logic to a method. You can see `toListPartitionItem()` in `HiveMetaStoreCache`, it also handle the special char: `// hive partition value maybe contains special characters like '=' and '/'` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org