This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ef5ed80fe319f5843d72664e3263dd6a02ec9cd4
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Fri Jul 7 10:37:33 2023 +0800

    [improvement](multi catalog)Use getPartitionsByNames to retrieve hive 
partitions (#21562)
    
    Before, we get hive partition using HMS getPartition api. In this case, 
each partition need to call the api once. The performance is very poor when 
partition number is large. This pr use getPartitionsByNames to get multiple 
partitions in one api call.
    To get 90000 partitions, the time costing is reduced to 14s from 108s.
---
 .../doris/datasource/hive/HiveMetaStoreCache.java  | 44 +++++++++++++++++++++-
 .../datasource/hive/PooledHiveMetaStoreClient.java | 14 +++++++
 .../hadoop/hive/metastore/HiveMetaStoreClient.java | 18 +++++----
 3 files changed, 67 insertions(+), 9 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 3de4a5d1d2..0b8e9d5bc2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -20,11 +20,13 @@ package org.apache.doris.datasource.hive;
 import org.apache.doris.analysis.PartitionValue;
 import org.apache.doris.backup.Status;
 import org.apache.doris.backup.Status.ErrCode;
+import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.HdfsResource;
 import org.apache.doris.catalog.ListPartitionItem;
 import org.apache.doris.catalog.PartitionItem;
 import org.apache.doris.catalog.PartitionKey;
 import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.external.HMSExternalTable;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
@@ -54,10 +56,12 @@ import com.google.common.base.Strings;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Range;
 import com.google.common.collect.RangeMap;
+import com.google.common.collect.Streams;
 import com.google.common.collect.TreeRangeMap;
 import lombok.Data;
 import org.apache.commons.lang3.math.NumberUtils;
@@ -157,7 +161,12 @@ public class HiveMetaStoreCache {
 
                     @Override
                     public HivePartition load(PartitionCacheKey key) {
-                        return loadPartitions(key);
+                        return loadPartition(key);
+                    }
+
+                    @Override
+                    public Map<PartitionCacheKey, HivePartition> 
loadAll(Iterable<? extends PartitionCacheKey> keys) {
+                        return loadPartitions(keys);
                     }
 
                 });
@@ -304,7 +313,7 @@ public class HiveMetaStoreCache {
         }
     }
 
-    private HivePartition loadPartitions(PartitionCacheKey key) {
+    private HivePartition loadPartition(PartitionCacheKey key) {
         Partition partition = catalog.getClient().getPartition(key.dbName, 
key.tblName, key.values);
         StorageDescriptor sd = partition.getSd();
         if (LOG.isDebugEnabled()) {
@@ -315,6 +324,37 @@ public class HiveMetaStoreCache {
         return new HivePartition(key.dbName, key.tblName, false, 
sd.getInputFormat(), sd.getLocation(), key.values);
     }
 
+    private Map<PartitionCacheKey, HivePartition> loadPartitions(Iterable<? 
extends PartitionCacheKey> keys) {
+        PartitionCacheKey oneKey = Iterables.get(keys, 0);
+        String dbName = oneKey.getDbName();
+        String tblName = oneKey.getTblName();
+        List<Column> partitionColumns = ((HMSExternalTable)
+                
(catalog.getDbNullable(dbName).getTableNullable(tblName))).getPartitionColumns();
+        // A partitionName is like "country=China/city=Beijing" or 
"date=2023-02-01"
+        List<String> partitionNames = Streams.stream(keys).map(key -> {
+            StringBuilder sb = new StringBuilder();
+            Preconditions.checkState(key.getValues().size() == 
partitionColumns.size());
+            for (int i = 0; i < partitionColumns.size(); i++) {
+                sb.append(partitionColumns.get(i).getName());
+                sb.append("=");
+                sb.append(key.getValues().get(i));
+                sb.append("/");
+            }
+            sb.delete(sb.length() - 1, sb.length());
+            return sb.toString();
+        }).collect(Collectors.toList());
+        List<Partition> partitions = catalog.getClient().getPartitions(dbName, 
tblName, partitionNames);
+        // Compose the return result map.
+        Map<PartitionCacheKey, HivePartition> ret = new HashMap<>();
+        for (Partition partition : partitions) {
+            StorageDescriptor sd = partition.getSd();
+            ret.put(new PartitionCacheKey(dbName, tblName, 
partition.getValues()),
+                    new HivePartition(dbName, tblName, false,
+                        sd.getInputFormat(), sd.getLocation(), 
partition.getValues()));
+        }
+        return ret;
+    }
+
     // Get File Status by using FileSystem API.
     private FileCacheValue getFileCache(String location, InputFormat<?, ?> 
inputFormat,
                                                                  JobConf 
jobConf,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java
index 9f9a1a457f..f3c2557a1d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java
@@ -152,6 +152,20 @@ public class PooledHiveMetaStoreClient {
         }
     }
 
+    public List<Partition> getPartitions(String dbName, String tblName, 
List<String> partitionNames) {
+        try (CachedClient client = getClient()) {
+            try {
+                return client.client.getPartitionsByNames(dbName, tblName, 
partitionNames);
+            } catch (Exception e) {
+                client.setThrowable(e);
+                throw e;
+            }
+        } catch (Exception e) {
+            throw new HMSClientException("failed to get partition for table %s 
in db %s with value %s", e, tblName,
+                dbName, partitionNames);
+        }
+    }
+
     public List<Partition> getPartitionsByFilter(String dbName, String 
tblName, String filter) {
         try (CachedClient client = getClient()) {
             try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
 
b/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 852c4a289d..f117b1fb12 100644
--- 
a/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ 
b/fe/fe-core/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -1739,13 +1739,17 @@ public class HiveMetaStoreClient implements 
IMetaStoreClient, AutoCloseable {
     return getPartitionsByNames(getDefaultCatalog(conf), db_name, tbl_name, 
part_names);
   }
 
-  @Override
-  public List<Partition> getPartitionsByNames(String catName, String db_name, 
String tbl_name,
-                                              List<String> part_names) throws 
TException {
-    List<Partition> parts =
-        client.get_partitions_by_names(prependCatalogToDbName(catName, 
db_name, conf), tbl_name, part_names);
-    return deepCopyPartitions(filterHook.filterPartitions(parts));
-  }
+    @Override
+    public List<Partition> getPartitionsByNames(String catName, String 
db_name, String tbl_name,
+                                                List<String> part_names) 
throws TException {
+        if (hiveVersion == HiveVersion.V1_0 || hiveVersion == HiveVersion.V2_0 
|| hiveVersion == HiveVersion.V2_3) {
+            return deepCopyPartitions(
+                
filterHook.filterPartitions(client.get_partitions_by_names(db_name, tbl_name, 
part_names)));
+        } else {
+            return deepCopyPartitions(filterHook.filterPartitions(
+                client.get_partitions_by_names(prependCatalogToDbName(catName, 
db_name, conf), tbl_name, part_names)));
+        }
+    }
 
   @Override
   public PartitionValuesResponse listPartitionValues(PartitionValuesRequest 
request)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to