This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ea6e4bc035 [enhance](mtmv)Optimize the speed of obtaining the last 
update time of Hive (#40169)
5ea6e4bc035 is described below

commit 5ea6e4bc03575444688dc4672d099354ba6b17e6
Author: zhangdong <493738...@qq.com>
AuthorDate: Tue Sep 3 11:43:49 2024 +0800

    [enhance](mtmv)Optimize the speed of obtaining the last update time of Hive 
(#40169)
    
    Previously, to obtain the last update time of a hive table, it was
    necessary to obtain the last update time of all partitions under the
    table, which required generating a large map.
---
 .../doris/datasource/hive/HMSExternalTable.java    | 79 ++++++++++++----------
 .../doris/datasource/hive/HiveMetaStoreCache.java  |  4 ++
 2 files changed, 46 insertions(+), 37 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index a9f2da13b40..8217f1c3a36 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -53,6 +53,7 @@ import com.google.common.collect.BiMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.collections.MapUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -741,18 +742,17 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         return res;
     }
 
-    private HiveMetaStoreCache.HivePartitionValues getHivePartitionValues() {
-        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
-                .getMetaStoreCache((HMSExternalCatalog) getCatalog());
-        return cache.getPartitionValues(
-                getDbName(), getName(), getPartitionColumnTypes());
-    }
-
     @Override
     public MTMVSnapshotIf getPartitionSnapshot(String partitionName, 
MTMVRefreshContext context)
             throws AnalysisException {
-        long partitionLastModifyTime = 
getPartitionLastModifyTime(partitionName);
-        return new MTMVTimestampSnapshot(partitionLastModifyTime);
+        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+                .getMetaStoreCache((HMSExternalCatalog) getCatalog());
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(
+                getDbName(), getName(), getPartitionColumnTypes());
+        Long partitionId = 
getPartitionIdByNameOrAnalysisException(partitionName, hivePartitionValues);
+        HivePartition hivePartition = 
getHivePartitionByIdOrAnalysisException(partitionId,
+                hivePartitionValues, cache);
+        return new MTMVTimestampSnapshot(hivePartition.getLastModifiedTime());
     }
 
     @Override
@@ -760,45 +760,50 @@ public class HMSExternalTable extends ExternalTable 
implements MTMVRelatedTableI
         if (getPartitionType() == PartitionType.UNPARTITIONED) {
             return new MTMVMaxTimestampSnapshot(getName(), getLastDdlTime());
         }
-        String partitionName = "";
+        Long maxPartitionId = 0L;
         long maxVersionTime = 0L;
         long visibleVersionTime;
-        for (Entry<String, PartitionItem> entry : 
getAndCopyPartitionItems().entrySet()) {
-            visibleVersionTime = getPartitionLastModifyTime(entry.getKey());
+        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
+                .getMetaStoreCache((HMSExternalCatalog) getCatalog());
+        HiveMetaStoreCache.HivePartitionValues hivePartitionValues = 
cache.getPartitionValues(
+                getDbName(), getName(), getPartitionColumnTypes());
+        BiMap<Long, String> idToName = 
hivePartitionValues.getPartitionNameToIdMap().inverse();
+        if (MapUtils.isEmpty(idToName)) {
+            throw new AnalysisException("partitions is empty for : " + 
getName());
+        }
+        for (Long partitionId : idToName.keySet()) {
+            visibleVersionTime = 
getHivePartitionByIdOrAnalysisException(partitionId, hivePartitionValues,
+                    cache).getLastModifiedTime();
             if (visibleVersionTime > maxVersionTime) {
                 maxVersionTime = visibleVersionTime;
-                partitionName = entry.getKey();
+                maxPartitionId = partitionId;
             }
         }
-        return new MTMVMaxTimestampSnapshot(partitionName, maxVersionTime);
+        return new MTMVMaxTimestampSnapshot(idToName.get(maxPartitionId), 
maxVersionTime);
     }
 
-    private long getPartitionLastModifyTime(String partitionName) throws 
AnalysisException {
-        return getPartitionByName(partitionName).getLastModifiedTime();
-    }
-
-    private HivePartition getPartitionByName(String partitionName) throws 
AnalysisException {
-        PartitionItem item = getAndCopyPartitionItems().get(partitionName);
-        List<List<String>> partitionValuesList = 
transferPartitionItemToPartitionValues(item);
-        List<HivePartition> partitions = 
getPartitionsByPartitionValues(partitionValuesList);
-        if (partitions.size() != 1) {
-            throw new AnalysisException("partition not normal, size: " + 
partitions.size());
+    private Long getPartitionIdByNameOrAnalysisException(String partitionName,
+            HiveMetaStoreCache.HivePartitionValues hivePartitionValues)
+            throws AnalysisException {
+        Long partitionId = 
hivePartitionValues.getPartitionNameToIdMap().get(partitionName);
+        if (partitionId == null) {
+            throw new AnalysisException("can not find partition: " + 
partitionName);
         }
-        return partitions.get(0);
+        return partitionId;
     }
 
-    private List<HivePartition> 
getPartitionsByPartitionValues(List<List<String>> partitionValuesList) {
-        HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr()
-                .getMetaStoreCache((HMSExternalCatalog) getCatalog());
-        return cache.getAllPartitionsWithCache(getDbName(), getName(),
-                partitionValuesList);
-    }
-
-    private List<List<String>> 
transferPartitionItemToPartitionValues(PartitionItem item) {
-        List<List<String>> partitionValuesList = 
Lists.newArrayListWithCapacity(1);
-        partitionValuesList.add(
-                ((ListPartitionItem) 
item).getItems().get(0).getPartitionValuesAsStringListForHive());
-        return partitionValuesList;
+    private HivePartition getHivePartitionByIdOrAnalysisException(Long 
partitionId,
+            HiveMetaStoreCache.HivePartitionValues hivePartitionValues,
+            HiveMetaStoreCache cache) throws AnalysisException {
+        List<String> partitionValues = 
hivePartitionValues.getPartitionValuesMap().get(partitionId);
+        if (CollectionUtils.isEmpty(partitionValues)) {
+            throw new AnalysisException("can not find partitionValues: " + 
partitionId);
+        }
+        HivePartition partition = cache.getHivePartition(getDbName(), 
getName(), partitionValues);
+        if (partition == null) {
+            throw new AnalysisException("can not find partition: " + 
partitionId);
+        }
+        return partition;
     }
 
     @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index b87c14afbc8..99338fb87ad 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -521,6 +521,10 @@ public class HiveMetaStoreCache {
         return fileLists;
     }
 
+    public HivePartition getHivePartition(String dbName, String name, 
List<String> partitionValues) {
+        return partitionCache.get(new PartitionCacheKey(dbName, name, 
partitionValues));
+    }
+
     public List<HivePartition> getAllPartitionsWithCache(String dbName, String 
name,
             List<List<String>> partitionValuesList) {
         return getAllPartitions(dbName, name, partitionValuesList, true);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to