This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b4432ce577b [Feature](statistics)Support external table analyze 
partition (#24154)
b4432ce577b is described below

commit b4432ce577b9f7cd38c8202b2f492b4623bc6ce7
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Mon Sep 18 14:59:26 2023 +0800

    [Feature](statistics)Support external table analyze partition (#24154)
    
    Enable collect partition level stats for hive external table.
---
 docs/en/docs/query-acceleration/statistics.md      |   6 +-
 docs/zh-CN/docs/query-acceleration/statistics.md   |   4 +-
 fe/fe-core/src/main/cup/sql_parser.cup             |   9 ++
 .../org/apache/doris/analysis/AnalyzeTblStmt.java  |  32 ++++-
 .../org/apache/doris/analysis/PartitionNames.java  |  38 ++++-
 .../doris/catalog/external/HMSExternalTable.java   |   7 +
 .../org/apache/doris/statistics/AnalysisInfo.java  |  16 ++-
 .../doris/statistics/AnalysisInfoBuilder.java      |  48 ++++++-
 .../apache/doris/statistics/AnalysisManager.java   |  16 ++-
 .../apache/doris/statistics/ColumnStatistic.java   |   8 +-
 .../doris/statistics/ColumnStatisticBuilder.java   |   2 +-
 .../apache/doris/statistics/HMSAnalysisTask.java   | 154 +++++++++++++--------
 .../apache/doris/statistics/StatisticsCache.java   |   2 +-
 .../apache/doris/statistics/StatisticsCleaner.java |   4 +-
 .../doris/statistics/StatisticsRepository.java     |   8 +-
 .../java/org/apache/doris/statistics/StatsId.java  |   8 +-
 .../doris/statistics/util/StatisticsUtil.java      |  16 ++-
 fe/fe-core/src/main/jflex/sql_scanner.flex         |   1 +
 .../hive/test_hive_partition_statistic.out         |  87 ++++++++++++
 .../hive/test_hive_partition_statistic.groovy      |  53 +++++++
 20 files changed, 420 insertions(+), 99 deletions(-)

diff --git a/docs/en/docs/query-acceleration/statistics.md 
b/docs/en/docs/query-acceleration/statistics.md
index 4d37d278eef..9cf75cdd5ec 100644
--- a/docs/en/docs/query-acceleration/statistics.md
+++ b/docs/en/docs/query-acceleration/statistics.md
@@ -69,7 +69,7 @@ Column statistics collection syntax:
 
 ```SQL
 ANALYZE < TABLE | DATABASE table_name | db_name >
-    [ PARTITIONS (partition_name [, ...]) ]
+    [ PARTITIONS [(*) | (partition_name [, ...]) | WITH RECENT COUNT] ]
     [ (column_name [, ...]) ]
     [ [ WITH SYNC ] [ WITH INCREMENTAL ] [ WITH SAMPLE PERCENT | ROWS ] [ WITH 
PERIOD ]]
     [ PROPERTIES ("key" = "value", ...) ];
@@ -78,7 +78,7 @@ ANALYZE < TABLE | DATABASE table_name | db_name >
 Explanation:
 
 - Table_name: The target table for the specified. It can be a 
`db_name.table_name` form.
-- partition_name: The specified target partitions(for hive external table 
only)。Must be partitions exist in `table_name`. Multiple partition names are 
separated by commas. e.g. (nation=US/city=Washington)
+- partition_name: The specified target partitions(for hive external table 
only)。Must be partitions exist in `table_name`. Multiple partition names are 
separated by commas. e.g. for single level partition: 
PARTITIONS(`event_date=20230706`), for multi level partition: 
PARTITIONS(`nation=US/city=Washington`). PARTITIONS(*) specifies all 
partitions, PARTITIONS WITH RECENT 30 specifies the latest 30 partitions.
 - Column_name: The specified target column. Must be `table_name` a column that 
exists in. Multiple column names are separated by commas.
 - Sync: Synchronizes the collection of statistics. Return after collection. If 
not specified, it will be executed asynchronously and the job ID will be 
returned.
 - Incremental: Incrementally gather statistics.
@@ -673,4 +673,4 @@ When executing ANALYZE, statistical data is written to the 
internal table __inte
 
 ### ANALYZE Failure for Large Tables
 
-Due to the strict resource limitations for ANALYZE, the ANALYZE operation for 
large tables might experience timeouts or exceed the memory limit of BE. In 
such cases, it's recommended to use ANALYZE ... WITH SAMPLE.... Additionally, 
for scenarios involving dynamic partitioned tables, it's highly recommended to 
use ANALYZE ... WITH INCREMENTAL.... This statement processes only the 
partitions with updated data incrementally, avoiding redundant computations and 
improving efficiency.
\ No newline at end of file
+Due to the strict resource limitations for ANALYZE, the ANALYZE operation for 
large tables might experience timeouts or exceed the memory limit of BE. In 
such cases, it's recommended to use ANALYZE ... WITH SAMPLE.... Additionally, 
for scenarios involving dynamic partitioned tables, it's highly recommended to 
use ANALYZE ... WITH INCREMENTAL.... This statement processes only the 
partitions with updated data incrementally, avoiding redundant computations and 
improving efficiency.
diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md 
b/docs/zh-CN/docs/query-acceleration/statistics.md
index ad14c1c9504..784dc0def7c 100644
--- a/docs/zh-CN/docs/query-acceleration/statistics.md
+++ b/docs/zh-CN/docs/query-acceleration/statistics.md
@@ -66,7 +66,7 @@ Doris 查询优化器使用统计信息来确定查询最有效的执行计划
 
 ```SQL
 ANALYZE < TABLE | DATABASE table_name | db_name > 
-    [ PARTITIONS (partition_name [, ...]) ]
+    [ PARTITIONS [(*) | (partition_name [, ...]) | WITH RECENT COUNT] ]
     [ (column_name [, ...]) ]
     [ [ WITH SYNC ] [WITH INCREMENTAL] [ WITH SAMPLE PERCENT | ROWS ] [ WITH 
PERIOD ] ]
     [ PROPERTIES ("key" = "value", ...) ];
@@ -75,7 +75,7 @@ ANALYZE < TABLE | DATABASE table_name | db_name >
 其中:
 
 - table_name: 指定的的目标表。可以是  `db_name.table_name`  形式。
-- partition_name: 指定的目标分区(目前只针对Hive外表)。必须是  `table_name`  
中存在的分区,多个列名称用逗号分隔。分区名样例:event_date=20230706, nation=CN/city=Beijing
+- partition_name: 指定的目标分区(目前只针对Hive外表)。必须是  `table_name`  
中存在的分区,多个列名称用逗号分隔。分区名样例: 
单层分区PARTITIONS(`event_date=20230706`),多层分区PARTITIONS(`nation=CN/city=Beijing`)。PARTITIONS
 (*)指定所有分区,PARTITIONS WITH RECENT 100指定最新的100个分区。
 - column_name: 指定的目标列。必须是  `table_name`  中存在的列,多个列名称用逗号分隔。
 - sync:同步收集统计信息。收集完后返回。若不指定则异步执行并返回任务 ID。
 - period:周期性收集统计信息。单位为秒,指定后会定期收集相应的统计信息。
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index f0e83f09646..baec3cd18d0 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -528,6 +528,7 @@ terminal String
     KW_QUOTA,
     KW_RANDOM,
     KW_RANGE,
+    KW_RECENT,
     KW_READ,
     KW_REBALANCE,
     KW_RECOVER,
@@ -5900,6 +5901,14 @@ partition_names ::=
     {:
         RESULT = new PartitionNames(true, Lists.newArrayList(partName));
     :}
+    | KW_PARTITIONS LPAREN STAR RPAREN
+    {:
+        RESULT = new PartitionNames(true);
+    :}
+    | KW_PARTITIONS KW_WITH KW_RECENT INTEGER_LITERAL:count
+    {:
+        RESULT = new PartitionNames(count);
+    :}
     ;
 
 opt_table_sample ::=
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
index ed5dda22498..874b83b280d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java
@@ -84,7 +84,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
 
     private final TableName tableName;
     private List<String> columnNames;
-    private List<String> partitionNames;
+    private PartitionNames partitionNames;
     private boolean isAllColumns;
 
     // after analyzed
@@ -97,7 +97,7 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
             AnalyzeProperties properties) {
         super(properties);
         this.tableName = tableName;
-        this.partitionNames = partitionNames == null ? null : 
partitionNames.getPartitionNames();
+        this.partitionNames = partitionNames;
         this.columnNames = columnNames;
         this.analyzeProperties = properties;
         this.isAllColumns = columnNames == null;
@@ -240,14 +240,34 @@ public class AnalyzeTblStmt extends AnalyzeStmt {
     }
 
     public Set<String> getPartitionNames() {
-        Set<String> partitions = partitionNames == null ? 
table.getPartitionNames() : Sets.newHashSet(partitionNames);
-        if (isSamplingPartition()) {
-            int partNum = 
ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
-            partitions = 
partitions.stream().limit(partNum).collect(Collectors.toSet());
+        if (partitionNames == null || partitionNames.getPartitionNames() == 
null) {
+            return null;
         }
+        Set<String> partitions = Sets.newHashSet();
+        partitions.addAll(partitionNames.getPartitionNames());
+        /*
+            if (isSamplingPartition()) {
+                int partNum = 
ConnectContext.get().getSessionVariable().getExternalTableAnalyzePartNum();
+                partitions = 
partitions.stream().limit(partNum).collect(Collectors.toSet());
+            }
+        */
         return partitions;
     }
 
+    public boolean isAllPartitions() {
+        if (partitionNames == null) {
+            return false;
+        }
+        return partitionNames.isAllPartitions();
+    }
+
+    public long getPartitionCount() {
+        if (partitionNames == null) {
+            return 0;
+        }
+        return partitionNames.getCount();
+    }
+
     public boolean isPartitionOnly() {
         return partitionNames != null;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionNames.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionNames.java
index 1140dfc6777..ca26a2978e0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionNames.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionNames.java
@@ -48,15 +48,37 @@ public class PartitionNames implements ParseNode, Writable {
     // true if these partitions are temp partitions
     @SerializedName(value = "isTemp")
     private final boolean isTemp;
+    private final boolean allPartitions;
+    private final long count;
+    // Default partition count to collect statistic for external table.
+    private static final long DEFAULT_PARTITION_COUNT = 100;
 
     public PartitionNames(boolean isTemp, List<String> partitionNames) {
         this.partitionNames = partitionNames;
         this.isTemp = isTemp;
+        this.allPartitions = false;
+        this.count = 0;
     }
 
     public PartitionNames(PartitionNames other) {
         this.partitionNames = Lists.newArrayList(other.partitionNames);
         this.isTemp = other.isTemp;
+        this.allPartitions = other.allPartitions;
+        this.count = 0;
+    }
+
+    public PartitionNames(boolean allPartitions) {
+        this.partitionNames = null;
+        this.isTemp = false;
+        this.allPartitions = allPartitions;
+        this.count = 0;
+    }
+
+    public PartitionNames(long partitionCount) {
+        this.partitionNames = null;
+        this.isTemp = false;
+        this.allPartitions = false;
+        this.count = partitionCount;
     }
 
     public List<String> getPartitionNames() {
@@ -67,9 +89,23 @@ public class PartitionNames implements ParseNode, Writable {
         return isTemp;
     }
 
+    public boolean isAllPartitions() {
+        return allPartitions;
+    }
+
+    public long getCount() {
+        return count;
+    }
+
     @Override
     public void analyze(Analyzer analyzer) throws AnalysisException {
-        if (partitionNames.isEmpty()) {
+        if (allPartitions && count > 0) {
+            throw new AnalysisException("All partition and partition count 
couldn't be set at the same time.");
+        }
+        if (allPartitions || count > 0) {
+            return;
+        }
+        if (partitionNames == null || partitionNames.isEmpty()) {
             throw new AnalysisException("No partition specified in partition 
lists");
         }
         // check if partition name is not empty string
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
index c1de1ea98d7..d691c0c6e5f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/HMSExternalTable.java
@@ -57,6 +57,7 @@ import org.apache.iceberg.Table;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
+import java.io.IOException;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.time.LocalDate;
@@ -628,6 +629,12 @@ public class HMSExternalTable extends ExternalTable {
             builder.setMaxValue(Double.MAX_VALUE);
         }
     }
+
+    @Override
+    public void gsonPostProcess() throws IOException {
+        super.gsonPostProcess();
+        estimatedRowCount = -1;
+    }
 }
 
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index 00b8c7cdaae..ec59c61fffe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -155,13 +155,19 @@ public class AnalysisInfo implements Writable {
     // True means this task is a table level task for external table.
     // This kind of task is mainly to collect the number of rows of a table.
     @SerializedName("externalTableLevelTask")
-    public boolean externalTableLevelTask;
+    public final boolean externalTableLevelTask;
 
     @SerializedName("partitionOnly")
-    public boolean partitionOnly;
+    public final boolean partitionOnly;
 
     @SerializedName("samplingPartition")
-    public boolean samplingPartition;
+    public final boolean samplingPartition;
+
+    @SerializedName("isAllPartition")
+    public final boolean isAllPartition;
+
+    @SerializedName("partitionCount")
+    public final long partitionCount;
 
     // For serialize
     @SerializedName("cronExpr")
@@ -181,7 +187,7 @@ public class AnalysisInfo implements Writable {
             int samplePercent, int sampleRows, int maxBucketNum, long 
periodTimeInMs, String message,
             long lastExecTimeInMs, long timeCostInMs, AnalysisState state, 
ScheduleType scheduleType,
             boolean isExternalTableLevelTask, boolean partitionOnly, boolean 
samplingPartition,
-            CronExpression cronExpression, boolean forceFull) {
+            boolean isAllPartition, long partitionCount, CronExpression 
cronExpression, boolean forceFull) {
         this.jobId = jobId;
         this.taskId = taskId;
         this.taskIds = taskIds;
@@ -208,6 +214,8 @@ public class AnalysisInfo implements Writable {
         this.externalTableLevelTask = isExternalTableLevelTask;
         this.partitionOnly = partitionOnly;
         this.samplingPartition = samplingPartition;
+        this.isAllPartition = isAllPartition;
+        this.partitionCount = partitionCount;
         this.cronExpression = cronExpression;
         if (cronExpression != null) {
             this.cronExprStr = cronExpression.getCronExpression();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 0c296ace91d..c17bbc69d91 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -56,6 +56,8 @@ public class AnalysisInfoBuilder {
     private boolean externalTableLevelTask;
     private boolean partitionOnly;
     private boolean samplingPartition;
+    private boolean isAllPartition;
+    private long partitionCount;
 
     private CronExpression cronExpression;
 
@@ -91,6 +93,8 @@ public class AnalysisInfoBuilder {
         externalTableLevelTask = info.externalTableLevelTask;
         partitionOnly = info.partitionOnly;
         samplingPartition = info.samplingPartition;
+        isAllPartition = info.isAllPartition;
+        partitionCount = info.partitionCount;
         cronExpression = info.cronExpression;
         forceFull = info.forceFull;
     }
@@ -225,6 +229,16 @@ public class AnalysisInfoBuilder {
         return this;
     }
 
+    public AnalysisInfoBuilder setAllPartition(boolean isAllPartition) {
+        this.isAllPartition = isAllPartition;
+        return this;
+    }
+
+    public AnalysisInfoBuilder setPartitionCount(long partitionCount) {
+        this.partitionCount = partitionCount;
+        return this;
+    }
+
     public void setCronExpression(CronExpression cronExpression) {
         this.cronExpression = cronExpression;
     }
@@ -237,6 +251,38 @@ public class AnalysisInfoBuilder {
         return new AnalysisInfo(jobId, taskId, taskIds, catalogName, dbName, 
tblName, colToPartitions, partitionNames,
                 colName, indexId, jobType, analysisMode, analysisMethod, 
analysisType, samplePercent,
                 sampleRows, maxBucketNum, periodTimeInMs, message, 
lastExecTimeInMs, timeCostInMs, state, scheduleType,
-                externalTableLevelTask, partitionOnly, samplingPartition, 
cronExpression, forceFull);
+                externalTableLevelTask, partitionOnly, samplingPartition, 
isAllPartition, partitionCount,
+                cronExpression, forceFull);
+    }
+
+    public AnalysisInfoBuilder copy() {
+        return new AnalysisInfoBuilder()
+                .setJobId(jobId)
+                .setTaskId(taskId)
+                .setTaskIds(taskIds)
+                .setCatalogName(catalogName)
+                .setDbName(dbName)
+                .setTblName(tblName)
+                .setColToPartitions(colToPartitions)
+                .setColName(colName)
+                .setIndexId(indexId)
+                .setJobType(jobType)
+                .setAnalysisMode(analysisMode)
+                .setAnalysisMethod(analysisMethod)
+                .setAnalysisType(analysisType)
+                .setSamplePercent(samplePercent)
+                .setSampleRows(sampleRows)
+                .setPeriodTimeInMs(periodTimeInMs)
+                .setMaxBucketNum(maxBucketNum)
+                .setMessage(message)
+                .setLastExecTimeInMs(lastExecTimeInMs)
+                .setTimeCostInMs(timeCostInMs)
+                .setState(state)
+                .setScheduleType(scheduleType)
+                .setExternalTableLevelTask(externalTableLevelTask)
+                .setSamplingPartition(samplingPartition)
+                .setPartitionOnly(partitionOnly)
+                .setAllPartition(isAllPartition)
+                .setPartitionCount(partitionCount);
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 131f650e4db..83c3cc84e49 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -363,7 +363,7 @@ public class AnalysisManager extends Daemon implements 
Writable {
         boolean isSync = stmt.isSync();
         Map<Long, BaseAnalysisTask> analysisTaskInfos = new HashMap<>();
         createTaskForEachColumns(jobInfo, analysisTaskInfos, isSync);
-        if (stmt.isAllColumns()
+        if (!jobInfo.partitionOnly && stmt.isAllColumns()
                 && StatisticsUtil.isExternalTable(jobInfo.catalogName, 
jobInfo.dbName, jobInfo.tblName)) {
             createTableLevelTaskForExternalTable(jobInfo, analysisTaskInfos, 
isSync);
         }
@@ -453,7 +453,7 @@ public class AnalysisManager extends Daemon implements 
Writable {
         }
 
         // Get the partition granularity statistics that have been collected
-        Map<String, Set<Long>> existColAndPartsForStats = StatisticsRepository
+        Map<String, Set<String>> existColAndPartsForStats = 
StatisticsRepository
                 .fetchColAndPartsForStats(tableId);
 
         if (existColAndPartsForStats.isEmpty()) {
@@ -461,12 +461,12 @@ public class AnalysisManager extends Daemon implements 
Writable {
             return columnToPartitions;
         }
 
-        Set<Long> existPartIdsForStats = new HashSet<>();
+        Set<String> existPartIdsForStats = new HashSet<>();
         
existColAndPartsForStats.values().forEach(existPartIdsForStats::addAll);
-        Map<Long, String> idToPartition = 
StatisticsUtil.getPartitionIdToName(table);
+        Set<String> idToPartition = StatisticsUtil.getPartitionIds(table);
         // Get an invalid set of partitions (those partitions were deleted)
-        Set<Long> invalidPartIds = existPartIdsForStats.stream()
-                .filter(id -> 
!idToPartition.containsKey(id)).collect(Collectors.toSet());
+        Set<String> invalidPartIds = existPartIdsForStats.stream()
+                .filter(id -> 
!idToPartition.contains(id)).collect(Collectors.toSet());
 
         if (!invalidPartIds.isEmpty()) {
             // Delete invalid partition statistics to avoid affecting table 
statistics
@@ -496,6 +496,8 @@ public class AnalysisManager extends Daemon implements 
Writable {
         Set<String> partitionNames = stmt.getPartitionNames();
         boolean partitionOnly = stmt.isPartitionOnly();
         boolean isSamplingPartition = stmt.isSamplingPartition();
+        boolean isAllPartition = stmt.isAllPartitions();
+        long partitionCount = stmt.getPartitionCount();
         int samplePercent = stmt.getSamplePercent();
         int sampleRows = stmt.getSampleRows();
         AnalysisType analysisType = stmt.getAnalysisType();
@@ -516,6 +518,8 @@ public class AnalysisManager extends Daemon implements 
Writable {
         infoBuilder.setPartitionNames(partitionNames);
         infoBuilder.setPartitionOnly(partitionOnly);
         infoBuilder.setSamplingPartition(isSamplingPartition);
+        infoBuilder.setAllPartition(isAllPartition);
+        infoBuilder.setPartitionCount(partitionCount);
         infoBuilder.setJobType(JobType.MANUAL);
         infoBuilder.setState(AnalysisState.PENDING);
         infoBuilder.setLastExecTimeInMs(System.currentTimeMillis());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
index 84f8d5bfbfe..67bc308bcaa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java
@@ -93,7 +93,7 @@ public class ColumnStatistic {
     public final Histogram histogram;
 
     @SerializedName("partitionIdToColStats")
-    public final Map<Long, ColumnStatistic> partitionIdToColStats = new 
HashMap<>();
+    public final Map<String, ColumnStatistic> partitionIdToColStats = new 
HashMap<>();
 
     public final String updatedTime;
 
@@ -120,7 +120,7 @@ public class ColumnStatistic {
     }
 
     public static ColumnStatistic fromResultRow(List<ResultRow> resultRows) {
-        Map<Long, ColumnStatistic> partitionIdToColStats = new HashMap<>();
+        Map<String, ColumnStatistic> partitionIdToColStats = new HashMap<>();
         ColumnStatistic columnStatistic = null;
         try {
             for (ResultRow resultRow : resultRows) {
@@ -128,7 +128,7 @@ public class ColumnStatistic {
                 if (partId == null) {
                     columnStatistic = fromResultRow(resultRow);
                 } else {
-                    partitionIdToColStats.put(Long.parseLong(partId), 
fromResultRow(resultRow));
+                    partitionIdToColStats.put(partId, 
fromResultRow(resultRow));
                 }
             }
         } catch (Throwable t) {
@@ -392,7 +392,7 @@ public class ColumnStatistic {
         return isUnKnown;
     }
 
-    public void putPartStats(long partId, ColumnStatistic columnStatistic) {
+    public void putPartStats(String partId, ColumnStatistic columnStatistic) {
         this.partitionIdToColStats.put(partId, columnStatistic);
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
index 4a3af054df7..fa4cf7ebc99 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java
@@ -40,7 +40,7 @@ public class ColumnStatisticBuilder {
 
     private ColumnStatistic original;
 
-    private Map<Long, ColumnStatistic> partitionIdToColStats = new HashMap<>();
+    private Map<String, ColumnStatistic> partitionIdToColStats = new 
HashMap<>();
 
     private String updatedTime;
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
index 512aa9982ff..973e5e76aa0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java
@@ -26,7 +26,7 @@ import org.apache.doris.qe.QueryState;
 import org.apache.doris.qe.StmtExecutor;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
-import org.apache.commons.lang3.StringUtils;
+import com.google.common.collect.Lists;
 import org.apache.commons.text.StringSubstitutor;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -35,10 +35,13 @@ import java.time.Instant;
 import java.time.LocalDateTime;
 import java.time.ZoneId;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.StringJoiner;
+import java.util.stream.Collectors;
 
 public class HMSAnalysisTask extends BaseAnalysisTask {
     private static final Logger LOG = 
LogManager.getLogger(HMSAnalysisTask.class);
@@ -48,7 +51,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
     public static final String NUM_FILES = "numFiles";
     public static final String TIMESTAMP = "transient_lastDdlTime";
 
-    private static final String ANALYZE_SQL_TABLE_TEMPLATE = "INSERT INTO "
+    private static final String ANALYZE_TABLE_TEMPLATE = "INSERT INTO "
             + "${internalDB}.${columnStatTbl}"
             + " SELECT "
             + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
@@ -67,10 +70,8 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
             + "NOW() "
             + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
 
-    private static final String ANALYZE_SQL_PARTITION_TEMPLATE = "INSERT INTO "
-            + "${internalDB}.${columnStatTbl}"
-            + " SELECT "
-            + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS id, "
+    private static final String ANALYZE_PARTITION_TEMPLATE = " SELECT "
+            + "CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}', '-', 
${partId}) AS id, "
             + "${catalogId} AS catalog_id, "
             + "${dbId} AS db_id, "
             + "${tblId} AS tbl_id, "
@@ -83,22 +84,22 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
             + "MIN(`${colName}`) AS min, "
             + "MAX(`${colName}`) AS max, "
             + "${dataSizeFunction} AS data_size, "
-            + "NOW() "
-            + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
+            + "NOW() FROM `${catalogName}`.`${dbName}`.`${tblName}` where ";
 
     private static final String ANALYZE_TABLE_COUNT_TEMPLATE = "SELECT 
COUNT(1) as rowCount "
             + "FROM `${catalogName}`.`${dbName}`.`${tblName}`";
 
+    // cache stats for each partition, it would be inserted into 
column_statistics in a batch.
+    private final List<List<ColStatsData>> buf = new ArrayList<>();
+
     private final boolean isTableLevelTask;
-    private final boolean isSamplingPartition;
     private final boolean isPartitionOnly;
-    private final Set<String> partitionNames;
+    private Set<String> partitionNames;
     private HMSExternalTable table;
 
     public HMSAnalysisTask(AnalysisInfo info) {
         super(info);
         isTableLevelTask = info.externalTableLevelTask;
-        isSamplingPartition = info.samplingPartition;
         isPartitionOnly = info.partitionOnly;
         partitionNames = info.partitionNames;
         table = (HMSExternalTable) tbl;
@@ -113,7 +114,7 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
     }
 
     /**
-     * Get table row count and insert the result to 
__internal_schema.table_statistics
+     * Get table row count
      */
     private void getTableStats() throws Exception {
         Map<String, String> params = buildTableStatsParams(null);
@@ -147,55 +148,15 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         //   0 AS data_size,
         //   NOW() FROM `hive`.`tpch100`.`region`
         if (isPartitionOnly) {
-            for (String partId : partitionNames) {
-                StringBuilder sb = new StringBuilder();
-                sb.append(ANALYZE_SQL_TABLE_TEMPLATE);
-                sb.append(" where ");
-                String[] splits = partId.split("/");
-                for (int i = 0; i < splits.length; i++) {
-                    String value = splits[i].split("=")[1];
-                    splits[i] = splits[i].replace(value, "\'" + value + "\'");
-                }
-                sb.append(StringUtils.join(splits, " and "));
-                Map<String, String> params = buildTableStatsParams(partId);
-                params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
-                params.put("columnStatTbl", 
StatisticConstants.STATISTIC_TBL_NAME);
-                params.put("colName", col.getName());
-                params.put("colId", info.colName);
-                params.put("dataSizeFunction", getDataSizeFunction(col));
-                StringSubstitutor stringSubstitutor = new 
StringSubstitutor(params);
-                String sql = stringSubstitutor.replace(sb.toString());
-                executeInsertSql(sql);
+            getPartitionNames();
+            List<String> partitionAnalysisSQLs = new ArrayList<>();
+            for (String partId : this.partitionNames) {
+                partitionAnalysisSQLs.add(generateSqlForPartition(partId));
             }
+            execSQLs(partitionAnalysisSQLs);
         } else {
             StringBuilder sb = new StringBuilder();
-            sb.append(ANALYZE_SQL_TABLE_TEMPLATE);
-            if (isSamplingPartition) {
-                sb.append(" where 1=1 ");
-                String[] splitExample = 
partitionNames.stream().findFirst().get().split("/");
-                int parts = splitExample.length;
-                List<String> partNames = new ArrayList<>();
-                for (String split : splitExample) {
-                    partNames.add(split.split("=")[0]);
-                }
-                List<List<String>> valueLists = new ArrayList<>();
-                for (int i = 0; i < parts; i++) {
-                    valueLists.add(new ArrayList<>());
-                }
-                for (String partId : partitionNames) {
-                    String[] partIds = partId.split("/");
-                    for (int i = 0; i < partIds.length; i++) {
-                        valueLists.get(i).add("\'" + partIds[i].split("=")[1] 
+ "\'");
-                    }
-                }
-                for (int i = 0; i < parts; i++) {
-                    sb.append(" and ");
-                    sb.append(partNames.get(i));
-                    sb.append(" in (");
-                    sb.append(StringUtils.join(valueLists.get(i), ","));
-                    sb.append(") ");
-                }
-            }
+            sb.append(ANALYZE_TABLE_TEMPLATE);
             Map<String, String> params = buildTableStatsParams("NULL");
             params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
             params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
@@ -208,6 +169,80 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
         }
     }
 
+    private void getPartitionNames() {
+        if (partitionNames == null) {
+            if (info.isAllPartition) {
+                partitionNames = table.getPartitionNames();
+            } else if (info.partitionCount > 0) {
+                partitionNames = table.getPartitionNames().stream()
+                    .limit(info.partitionCount).collect(Collectors.toSet());
+            }
+            if (partitionNames == null || partitionNames.isEmpty()) {
+                throw new RuntimeException("Not a partition table or no 
partition specified.");
+            }
+        }
+    }
+
+    private String generateSqlForPartition(String partId) {
+        StringBuilder sb = new StringBuilder();
+        sb.append(ANALYZE_PARTITION_TEMPLATE);
+        String[] splits = partId.split("/");
+        for (int i = 0; i < splits.length; i++) {
+            String[] kv = splits[i].split("=");
+            sb.append(kv[0]);
+            sb.append("='");
+            sb.append(kv[1]);
+            sb.append("'");
+            if (i < splits.length - 1) {
+                sb.append(" and ");
+            }
+        }
+        Map<String, String> params = buildTableStatsParams(partId);
+        params.put("internalDB", FeConstants.INTERNAL_DB_NAME);
+        params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME);
+        params.put("colName", col.getName());
+        params.put("colId", info.colName);
+        params.put("dataSizeFunction", getDataSizeFunction(col));
+        return new StringSubstitutor(params).replace(sb.toString());
+    }
+
+    public void execSQLs(List<String> partitionAnalysisSQLs) throws Exception {
+        long startTime = System.currentTimeMillis();
+        LOG.debug("analyze task {} start at {}", info.toString(), new Date());
+        try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) 
{
+            List<List<String>> sqlGroups = 
Lists.partition(partitionAnalysisSQLs, StatisticConstants.UNION_ALL_LIMIT);
+            for (List<String> group : sqlGroups) {
+                if (killed) {
+                    return;
+                }
+                StringJoiner partitionCollectSQL = new StringJoiner("UNION 
ALL");
+                group.forEach(partitionCollectSQL::add);
+                stmtExecutor = new StmtExecutor(r.connectContext, 
partitionCollectSQL.toString());
+                buf.add(stmtExecutor.executeInternalQuery()
+                        
.stream().map(ColStatsData::new).collect(Collectors.toList()));
+                QueryState queryState = r.connectContext.getState();
+                if 
(queryState.getStateType().equals(QueryState.MysqlStateType.ERR)) {
+                    throw new RuntimeException(String.format("Failed to 
analyze %s.%s.%s, error: %s sql: %s",
+                        info.catalogName, info.dbName, info.colName, 
partitionCollectSQL,
+                        queryState.getErrorMessage()));
+                }
+            }
+            for (List<ColStatsData> colStatsDataList : buf) {
+                StringBuilder batchInsertSQL =
+                        new StringBuilder("INSERT INTO " + 
StatisticConstants.FULL_QUALIFIED_STATS_TBL_NAME
+                        + " VALUES ");
+                StringJoiner sj = new StringJoiner(",");
+                colStatsDataList.forEach(c -> sj.add(c.toSQL(true)));
+                batchInsertSQL.append(sj);
+                stmtExecutor = new StmtExecutor(r.connectContext, 
batchInsertSQL.toString());
+                executeWithExceptionOnFail(stmtExecutor);
+            }
+        } finally {
+            LOG.debug("analyze task {} end. cost {}ms", info, 
System.currentTimeMillis() - startTime);
+        }
+
+    }
+
     private void executeInsertSql(String sql) throws Exception {
         long startTime = System.currentTimeMillis();
         try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext()) 
{
@@ -270,7 +305,8 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
     @Override
     protected void afterExecution() {
         // Table level task doesn't need to sync any value to sync stats, it 
stores the value in metadata.
-        if (isTableLevelTask) {
+        // Partition only task doesn't need to refresh cached.
+        if (isTableLevelTask || isPartitionOnly) {
             return;
         }
         Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), 
-1, col.getName());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
index 6fdcd8ed3d6..c9b049a8cfc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java
@@ -288,7 +288,7 @@ public class StatisticsCache {
                 StatsId statsId = new StatsId(r);
                 long tblId = statsId.tblId;
                 long idxId = statsId.idxId;
-                long partId = statsId.partId;
+                String partId = statsId.partId;
                 String colId = statsId.colId;
                 ColumnStatistic partStats = ColumnStatistic.fromResultRow(r);
                 keyToColStats.get(new StatisticsCacheKey(tblId, idxId, 
colId)).putPartStats(partId, partStats);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
index 849b68fe94a..6521a8b4a59 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java
@@ -228,11 +228,11 @@ public class StatisticsCleaner extends MasterDaemon {
                         continue;
                     }
                     OlapTable olapTable = (OlapTable) t;
-                    Long partId = statsId.partId;
+                    String partId = statsId.partId;
                     if (partId == null) {
                         continue;
                     }
-                    if (!olapTable.getPartitionIds().contains(partId)) {
+                    if 
(!olapTable.getPartitionIds().contains(Long.parseLong(partId))) {
                         expiredStats.ids.add(id);
                     }
                 } catch (Exception e) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index f9b18f41e45..cd3cc67f3c9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -179,7 +179,7 @@ public class StatisticsRepository {
         return stringJoiner.toString();
     }
 
-    public static void dropStatistics(Set<Long> partIds) throws DdlException {
+    public static void dropStatistics(Set<String> partIds) throws DdlException 
{
         dropStatisticsByPartId(partIds, StatisticConstants.STATISTIC_TBL_NAME);
     }
 
@@ -202,7 +202,7 @@ public class StatisticsRepository {
         }
     }
 
-    public static void dropStatisticsByPartId(Set<Long> partIds, String 
statsTblName) throws DdlException {
+    public static void dropStatisticsByPartId(Set<String> partIds, String 
statsTblName) throws DdlException {
         Map<String, String> params = new HashMap<>();
         String right = StatisticsUtil.joinElementsToString(partIds, ",");
         String inPredicate = String.format(" part_id IN (%s)", right);
@@ -296,14 +296,14 @@ public class StatisticsRepository {
         return StatisticsUtil.execStatisticQuery(new 
StringSubstitutor(params).replace(FETCH_STATS_FULL_NAME));
     }
 
-    public static Map<String, Set<Long>> fetchColAndPartsForStats(long tblId) {
+    public static Map<String, Set<String>> fetchColAndPartsForStats(long 
tblId) {
         Map<String, String> params = Maps.newHashMap();
         params.put("tblId", String.valueOf(tblId));
         StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
         String partSql = stringSubstitutor.replace(FETCH_STATS_PART_ID);
         List<ResultRow> resultRows = 
StatisticsUtil.execStatisticQuery(partSql);
 
-        Map<String, Set<Long>> columnToPartitions = Maps.newHashMap();
+        Map<String, Set<String>> columnToPartitions = Maps.newHashMap();
 
         resultRows.forEach(row -> {
             try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
index c7af03a8d9e..3f9b2641b75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatsId.java
@@ -32,7 +32,7 @@ public class StatsId {
     public final String colId;
 
     // nullable
-    public final Long partId;
+    public final String partId;
 
     public StatsId(ResultRow row) {
         this.id = row.get(0);
@@ -41,7 +41,7 @@ public class StatsId {
         this.tblId = Long.parseLong(row.get(3));
         this.idxId = Long.parseLong(row.get(4));
         this.colId = row.get(5);
-        this.partId = row.get(6) == null ? null : Long.parseLong(row.get(6));
+        this.partId = row.get(6);
     }
 
     public String toSQL() {
@@ -51,8 +51,8 @@ public class StatsId {
         sj.add(String.valueOf(dbId));
         sj.add(String.valueOf(tblId));
         sj.add(String.valueOf(idxId));
-        sj.add(StatisticsUtil.quote(String.valueOf(colId)));
-        sj.add(String.valueOf(partId));
+        sj.add(StatisticsUtil.quote(colId));
+        sj.add(StatisticsUtil.quote(partId));
         return sj.toString();
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index ed95f4bd1f3..3d2d0b17188 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -103,6 +103,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
 import java.util.StringJoiner;
 import java.util.UUID;
 import java.util.function.Function;
@@ -439,6 +440,15 @@ public class StatisticsUtil {
                 ));
     }
 
+    public static Set<String> getPartitionIds(TableIf table) {
+        if (table instanceof OlapTable) {
+            return ((OlapTable) 
table).getPartitionIds().stream().map(String::valueOf).collect(Collectors.toSet());
+        } else if (table instanceof ExternalTable) {
+            return table.getPartitionNames();
+        }
+        throw new RuntimeException(String.format("Not supported Table %s", 
table.getClass().getName()));
+    }
+
     public static <T> String joinElementsToString(Collection<T> values, String 
delimiter) {
         StringJoiner builder = new StringJoiner(delimiter);
         values.forEach(v -> builder.add(String.valueOf(v)));
@@ -512,7 +522,11 @@ public class StatisticsUtil {
         }
         // Table parameters contains row count, simply get and return it.
         if (parameters.containsKey(NUM_ROWS)) {
-            return Long.parseLong(parameters.get(NUM_ROWS));
+            long rows = Long.parseLong(parameters.get(NUM_ROWS));
+            // Sometimes, the NUM_ROWS in hms is 0 but actually is not. Need 
to check TOTAL_SIZE if NUM_ROWS is 0.
+            if (rows != 0) {
+                return rows;
+            }
         }
         if (!parameters.containsKey(TOTAL_SIZE) || isInit) {
             return -1;
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex 
b/fe/fe-core/src/main/jflex/sql_scanner.flex
index 9845c2c8df5..0f8eaa5d9bc 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -381,6 +381,7 @@ import org.apache.doris.qe.SqlModeHelper;
         keywordMap.put("read", new Integer(SqlParserSymbols.KW_READ));
         keywordMap.put("real", new Integer(SqlParserSymbols.KW_DOUBLE));
         keywordMap.put("rebalance", new 
Integer(SqlParserSymbols.KW_REBALANCE));
+        keywordMap.put("recent", new Integer(SqlParserSymbols.KW_RECENT));
         keywordMap.put("recover", new Integer(SqlParserSymbols.KW_RECOVER));
         keywordMap.put("recycle", new Integer(SqlParserSymbols.KW_RECYCLE));
         keywordMap.put("refresh", new Integer(SqlParserSymbols.KW_REFRESH));
diff --git 
a/regression-test/data/external_table_p2/hive/test_hive_partition_statistic.out 
b/regression-test/data/external_table_p2/hive/test_hive_partition_statistic.out
new file mode 100644
index 00000000000..0e32ebe4775
--- /dev/null
+++ 
b/regression-test/data/external_table_p2/hive/test_hive_partition_statistic.out
@@ -0,0 +1,87 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !01 --
+event_day=1956-09-07   39
+event_day=2008-09-25   39
+
+-- !1 --
+event_day=2008-09-25   10000   1       0       0       0
+event_day=2008-09-25   10000   1       0       2008-09-25      2008-09-25
+event_day=2008-09-25   10000   11      0       0       10
+event_day=2008-09-25   10000   13      0       MFGR#12 MFGR#52
+event_day=2008-09-25   10000   13      0       antique wheat
+event_day=2008-09-25   10000   16      0       JUMBO BAG       WRAP PACK
+event_day=2008-09-25   10000   17      0       1       48
+event_day=2008-09-25   10000   17      0       64078   113087
+event_day=2008-09-25   10000   17      0       754035  763603
+event_day=2008-09-25   10000   17      0       ECONOMY ANODIZED BRASS  
STANDARD POLISHED TIN
+event_day=2008-09-25   10000   17      0       MFGR#1221       MFGR#528
+event_day=2008-09-25   10000   17      0       burnished drab  violet firebrick
+event_day=2008-09-25   10000   2362    0       19920101        19980802
+event_day=2008-09-25   10000   2382    0       19920203        19981027
+event_day=2008-09-25   10000   25      0       ALGERIA VIETNAM
+event_day=2008-09-25   10000   25      0       ALGERIA VIETNAM
+event_day=2008-09-25   10000   250     0       ALGERIA  0      VIETNAM  9
+event_day=2008-09-25   10000   250     0       ALGERIA  0      VIETNAM  9
+event_day=2008-09-25   10000   5       0       1-URGENT        5-LOW
+event_day=2008-09-25   10000   5       0       AFRICA  MIDDLE EAST
+event_day=2008-09-25   10000   5       0       AFRICA  MIDDLE EAST
+event_day=2008-09-25   10000   5       0       AUTOMOBILE      MACHINERY
+event_day=2008-09-25   10000   5       0       MFGR#1  MFGR#5
+event_day=2008-09-25   10000   50      0       1       50
+event_day=2008-09-25   10000   6074    0       96748   9388900
+event_day=2008-09-25   10000   7       0       1       7
+event_day=2008-09-25   10000   7       0       AIR     TRUCK
+event_day=2008-09-25   10000   845     0       106797  9423950
+event_day=2008-09-25   10000   9       0       0       8
+event_day=2008-09-25   10000   9775    0       119     2999848
+event_day=2008-09-25   10000   9794    0       107970  45833194
+event_day=2008-09-25   10000   9837    0         MGHV8XBriO    zzlztYTFMFW
+event_day=2008-09-25   10000   9846    0       Customer#000000119      
Customer#002999848
+event_day=2008-09-25   10000   9861    0       13091   599962401
+event_day=2008-09-25   10000   9879    0       10-100-337-6599 34-999-684-2905
+event_day=2008-09-25   10000   9883    0       Supplier#000000001      
Supplier#000199983
+event_day=2008-09-25   10000   9896    0         B5YhCdkaxR232CrXx     
zyxtAvAViHMabnr,1UQybiW
+event_day=2008-09-25   10000   9927    0       10-105-800-9296 34-998-982-7450
+event_day=2008-09-25   10000   9971    0       1       199983
+
+-- !2 --
+event_day=1956-09-07   10000   1       0       0       0
+event_day=1956-09-07   10000   1       0       1956-09-07      1956-09-07
+event_day=1956-09-07   10000   11      0       0       10
+event_day=1956-09-07   10000   13      0       MFGR#12 MFGR#52
+event_day=1956-09-07   10000   13      0       antique wheat
+event_day=1956-09-07   10000   16      0       JUMBO BAG       WRAP PACK
+event_day=1956-09-07   10000   17      0       1       48
+event_day=1956-09-07   10000   17      0       64078   113087
+event_day=1956-09-07   10000   17      0       754035  763603
+event_day=1956-09-07   10000   17      0       ECONOMY ANODIZED BRASS  
STANDARD POLISHED TIN
+event_day=1956-09-07   10000   17      0       MFGR#1221       MFGR#528
+event_day=1956-09-07   10000   17      0       burnished drab  violet firebrick
+event_day=1956-09-07   10000   2362    0       19920101        19980802
+event_day=1956-09-07   10000   2382    0       19920203        19981027
+event_day=1956-09-07   10000   25      0       ALGERIA VIETNAM
+event_day=1956-09-07   10000   25      0       ALGERIA VIETNAM
+event_day=1956-09-07   10000   250     0       ALGERIA  0      VIETNAM  9
+event_day=1956-09-07   10000   250     0       ALGERIA  0      VIETNAM  9
+event_day=1956-09-07   10000   5       0       1-URGENT        5-LOW
+event_day=1956-09-07   10000   5       0       AFRICA  MIDDLE EAST
+event_day=1956-09-07   10000   5       0       AFRICA  MIDDLE EAST
+event_day=1956-09-07   10000   5       0       AUTOMOBILE      MACHINERY
+event_day=1956-09-07   10000   5       0       MFGR#1  MFGR#5
+event_day=1956-09-07   10000   50      0       1       50
+event_day=1956-09-07   10000   6074    0       96748   9388900
+event_day=1956-09-07   10000   7       0       1       7
+event_day=1956-09-07   10000   7       0       AIR     TRUCK
+event_day=1956-09-07   10000   845     0       106797  9423950
+event_day=1956-09-07   10000   9       0       0       8
+event_day=1956-09-07   10000   9775    0       119     2999848
+event_day=1956-09-07   10000   9794    0       107970  45833194
+event_day=1956-09-07   10000   9837    0         MGHV8XBriO    zzlztYTFMFW
+event_day=1956-09-07   10000   9846    0       Customer#000000119      
Customer#002999848
+event_day=1956-09-07   10000   9861    0       13091   599962401
+event_day=1956-09-07   10000   9879    0       10-100-337-6599 34-999-684-2905
+event_day=1956-09-07   10000   9883    0       Supplier#000000001      
Supplier#000199983
+event_day=1956-09-07   10000   9896    0         B5YhCdkaxR232CrXx     
zyxtAvAViHMabnr,1UQybiW
+event_day=1956-09-07   10000   9927    0       10-105-800-9296 34-998-982-7450
+event_day=1956-09-07   10000   9971    0       1       199983
+
diff --git 
a/regression-test/suites/external_table_p2/hive/test_hive_partition_statistic.groovy
 
b/regression-test/suites/external_table_p2/hive/test_hive_partition_statistic.groovy
new file mode 100644
index 00000000000..9f4b462237f
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_hive_partition_statistic.groovy
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_partition_statistic", 
"p2,external,hive,external_remote,external_remote_hive") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_partition_statistic"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+
+        sql """use ${catalog_name}.multi_partition"""
+        sql """analyze table multi_partition_orc partitions 
(`event_day=2008-09-25`, `event_day=1956-09-07`) with sync"""
+
+        def ctlId
+        def result = sql """show proc '/catalogs'"""
+
+        for (int i = 0; i < result.size(); i++) {
+            if (result[i][1] == catalog_name) {
+                ctlId = result[i][0]
+            }
+        }
+
+        qt_01 """select part_id, count(*) from 
internal.__internal_schema.column_statistics where catalog_id='$ctlId' group by 
part_id order by part_id;"""
+        order_qt_1 """select part_id, count, ndv, null_count, min, max from 
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and 
part_id='event_day=2008-09-25'"""
+        order_qt_2 """select part_id, count, ndv, null_count, min, max from 
internal.__internal_schema.column_statistics where catalog_id='$ctlId' and 
part_id='event_day=1956-09-07'"""
+
+        sql """drop catalog ${catalog_name}""";
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to