Jibing-Li commented on code in PR #24853:
URL: https://github.com/apache/doris/pull/24853#discussion_r1357925254


##########
fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java:
##########
@@ -144,27 +174,101 @@ private void getTableColumnStats() throws Exception {
         //   MAX(`r_regionkey`) AS max,
         //   0 AS data_size,
         //   NOW() FROM `hive`.`tpch100`.`region`
-        if (isPartitionOnly) {
-            getPartitionNames();
-            List<String> partitionAnalysisSQLs = new ArrayList<>();
-            for (String partId : this.partitionNames) {
-                partitionAnalysisSQLs.add(generateSqlForPartition(partId));
+        StringBuilder sb = new StringBuilder();
+        sb.append(ANALYZE_TABLE_TEMPLATE);
+        Map<String, String> params = buildStatsParams("NULL");
+        params.put("dataSizeFunction", getDataSizeFunction(col));
+        params.put("nullCountExpr", getNullCountExpression());
+        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
+        String sql = stringSubstitutor.replace(sb.toString());
+        executeInsertSql(sql);
+    }
+
+    private void getPartitionColumnStats() throws Exception {
+        Set<String> partitionNames = table.getPartitionNames();
+        Set<String> ndvPartValues = Sets.newHashSet();
+        long numNulls = 0;
+        long dataSize = 0;
+        String min = null;
+        String max = null;
+        for (String names : partitionNames) {
+            // names is like "date=20230101" for one level partition
+            // and like "date=20230101/hour=12" for two level partition
+            String[] parts = names.split("/");
+            for (String part : parts) {
+                if (part.startsWith(col.getName())) {
+                    String value = HiveUtil.getHivePartitionValue(part);
+                    // HIVE_DEFAULT_PARTITION hive partition value when the 
partition name is not specified.
+                    if (value == null || value.isEmpty() || 
value.equals(HiveMetaStoreCache.HIVE_DEFAULT_PARTITION)) {
+                        numNulls += 1;
+                        continue;
+                    }
+                    ndvPartValues.add(value);
+                    dataSize += col.getType().isStringType() ? value.length() 
: col.getType().getColumnSize();
+                    min = updateMinValue(min, value);
+                    max = updateMaxValue(max, value);
+                }
+            }
+        }
+        // Estimate the row count. This value is inaccurate if the table stats 
is empty.
+        TableStats tableStatsStatus = 
Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId());
+        long count = tableStatsStatus == null ? table.estimatedRowCount() : 
tableStatsStatus.rowCount;
+        dataSize = dataSize * count / partitionNames.size();
+        numNulls = numNulls * count / partitionNames.size();
+        int ndv = ndvPartValues.size();
+
+        Map<String, String> params = buildStatsParams("NULL");
+        params.put("row_count", String.valueOf(count));
+        params.put("ndv", String.valueOf(ndv));
+        params.put("null_count", String.valueOf(numNulls));
+        params.put("min", min);
+        params.put("max", max);
+        params.put("data_size", String.valueOf(dataSize));
+        StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
+        String sql = 
stringSubstitutor.replace(ANALYZE_PARTITION_COLUMN_TEMPLATE);
+        executeInsertSql(sql);
+    }
+
+    private String updateMinValue(String currentMin, String value) {

Review Comment:
   Added



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to