morningman commented on code in PR #25175:
URL: https://github.com/apache/doris/pull/25175#discussion_r1351812538


##########
fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java:
##########
@@ -142,6 +142,8 @@ default int getBaseColumnIdxByName(String colName) {
 
     Map<String, Set<String>> findReAnalyzeNeededPartitions();
 
+    List<Long> getChunkSizes();

Review Comment:
   Add comment for these new interface



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java:
##########
@@ -58,14 +58,15 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
             + "${idxId} AS idx_id, "
             + "'${colId}' AS col_id, "
             + "NULL AS part_id, "
-            + "${countExpr} AS row_count, "
-            + "NDV(`${colName}`) AS ndv, "
-            + "${nullCountExpr} AS null_count, "
+            + "ROUND(COUNT(1) * ${scaleFactor}) AS row_count, "
+            + "case when NDV(`${colName}`)/count('${colName}') < 0.3 then 
NDV(`${colName}`) "

Review Comment:
   this factor should be defined some where



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java:
##########
@@ -396,4 +396,9 @@ public Map<String, Set<String>> 
findReAnalyzeNeededPartitions() {
         partitions.add("Dummy Partition");
         return 
getBaseSchema().stream().collect(Collectors.toMap(Column::getName, k -> 
partitions));
     }
+
+    @Override
+    public List<Long> getChunkSizes() {
+        return null;

Review Comment:
   Better not return "null"



##########
fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java:
##########
@@ -277,38 +277,53 @@ private Map<String, String> buildTableStatsParams(String 
partId) {
         commonParams.put("catalogName", catalog.getName());
         commonParams.put("dbName", db.getFullName());
         commonParams.put("tblName", tbl.getName());
-        commonParams.put("countExpr", getCountExpression());
+        commonParams.put("sampleExpr", getSampleExpression());
+        commonParams.put("scaleFactor", getSampleScaleFactor());
         if (col != null) {
             commonParams.put("type", col.getType().toString());
         }
         commonParams.put("lastAnalyzeTimeInMs", 
String.valueOf(System.currentTimeMillis()));
         return commonParams;
     }
 
-    protected String getCountExpression() {
-        if (info.samplePercent > 0) {
-            return String.format("ROUND(COUNT(1) * 100 / %d)", 
info.samplePercent);
-        } else {
-            return "COUNT(1)";
+    protected String getSampleExpression() {
+        if (tableSample == null) {
+            return "";
         }
-    }
-
-    protected String getNullCountExpression() {
-        if (info.samplePercent > 0) {
-            return String.format("ROUND(SUM(CASE WHEN `${colName}` IS NULL 
THEN 1 ELSE 0 END) * 100 / %d)",
-                info.samplePercent);
+        if (tableSample.isPercent()) {
+            return String.format("TABLESAMPLE(%d PERCENT)", 
tableSample.getSampleValue());
         } else {
-            return "SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END)";
+            return String.format("TABLESAMPLE(%d ROWS)", 
tableSample.getSampleValue());
         }
     }
 
-    protected String getDataSizeFunction(Column column) {
-        String originFunction = super.getDataSizeFunction(column);
-        if (info.samplePercent > 0 && !isPartitionOnly) {
-            return String.format("ROUND((%s) * 100 / %d)", originFunction, 
info.samplePercent);
+    protected String getSampleScaleFactor() {

Review Comment:
   Add some comment to explain the logic in code, better give some example



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to