This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fe21f7428133cee6d88831be79097072c2359a87
Author: wuwenchi <wuwenchi...@hotmail.com>
AuthorDate: Mon Jul 1 11:56:58 2024 +0800

    [opt](hive) save hive table schema in transaction (#37008)
    
    Save the table schema, reduce the number of HMS calls, and improve write
    performance.
---
 .../org/apache/doris/datasource/hive/HMSTransaction.java  | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
index d883b9dc786..bd0d2315c1e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
@@ -48,6 +48,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import io.airlift.concurrent.MoreFutures;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -88,6 +89,7 @@ public class HMSTransaction implements Transaction {
     private final Map<DatabaseTableName, Action<TableAndMore>> tableActions = 
new HashMap<>();
     private final Map<DatabaseTableName, Map<List<String>, 
Action<PartitionAndMore>>>
             partitionActions = new HashMap<>();
+    private final Map<DatabaseTableName, List<FieldSchema>> tableColumns = new 
HashMap<>();
 
     private final Executor fileSystemExecutor;
     private HmsCommitter hmsCommitter;
@@ -123,7 +125,7 @@ public class HMSTransaction implements Transaction {
         }
     }
 
-    private Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = 
new HashSet<>();
+    private final Set<UncompletedMpuPendingUpload> 
uncompletedMpuPendingUploads = new HashSet<>();
 
     public HMSTransaction(HiveMetadataOps hiveOps, FileSystemProvider 
fileSystemProvider, Executor fileSystemExecutor) {
         this.hiveOps = hiveOps;
@@ -241,7 +243,7 @@ public class HMSTransaction implements Transaction {
                                 Maps.newHashMap(),
                                 sd.getOutputFormat(),
                                 sd.getSerdeInfo().getSerializationLib(),
-                                hiveOps.getClient().getSchema(dbName, tbName)
+                                getTableColumns(dbName, tbName)
                         );
                         if (updateMode == TUpdateMode.OVERWRITE) {
                             dropPartition(dbName, tbName, 
hivePartition.getPartitionValues(), true);
@@ -396,7 +398,7 @@ public class HMSTransaction implements Transaction {
                         partition.getParameters(),
                         sd.getOutputFormat(),
                         sd.getSerdeInfo().getSerializationLib(),
-                        hiveOps.getClient().getSchema(dbName, tbName)
+                        getTableColumns(dbName, tbName)
                 );
 
                 partitionActionsForTable.put(
@@ -913,6 +915,11 @@ public class HMSTransaction implements Transaction {
         throw new RuntimeException("Not Found table: " + databaseName + "." + 
tableName);
     }
 
+    public synchronized List<FieldSchema> getTableColumns(String databaseName, 
String tableName) {
+        return tableColumns.computeIfAbsent(new 
DatabaseTableName(databaseName, tableName),
+            key -> hiveOps.getClient().getSchema(dbName, tbName));
+    }
+
     public synchronized void finishChangingExistingTable(
             ActionType actionType,
             String databaseName,
@@ -1276,7 +1283,7 @@ public class HMSTransaction implements Transaction {
                     Maps.newHashMap(),
                     sd.getOutputFormat(),
                     sd.getSerdeInfo().getSerializationLib(),
-                    hiveOps.getClient().getSchema(dbName, tbName)
+                    getTableColumns(dbName, tbName)
             );
 
             HivePartitionWithStatistics partitionWithStats =


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to