This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit fe21f7428133cee6d88831be79097072c2359a87 Author: wuwenchi <wuwenchi...@hotmail.com> AuthorDate: Mon Jul 1 11:56:58 2024 +0800 [opt](hive) save hive table schema in transaction (#37008) Save the table schema, reduce the number of HMS calls, and improve write performance. --- .../org/apache/doris/datasource/hive/HMSTransaction.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java index d883b9dc786..bd0d2315c1e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java @@ -48,6 +48,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import io.airlift.concurrent.MoreFutures; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; @@ -88,6 +89,7 @@ public class HMSTransaction implements Transaction { private final Map<DatabaseTableName, Action<TableAndMore>> tableActions = new HashMap<>(); private final Map<DatabaseTableName, Map<List<String>, Action<PartitionAndMore>>> partitionActions = new HashMap<>(); + private final Map<DatabaseTableName, List<FieldSchema>> tableColumns = new HashMap<>(); private final Executor fileSystemExecutor; private HmsCommitter hmsCommitter; @@ -123,7 +125,7 @@ public class HMSTransaction implements Transaction { } } - private Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = new HashSet<>(); + private final Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = new HashSet<>(); public HMSTransaction(HiveMetadataOps hiveOps, FileSystemProvider fileSystemProvider, Executor fileSystemExecutor) { this.hiveOps = hiveOps; @@ -241,7 +243,7 @@ public class HMSTransaction implements Transaction { Maps.newHashMap(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - hiveOps.getClient().getSchema(dbName, tbName) + getTableColumns(dbName, tbName) ); if (updateMode == TUpdateMode.OVERWRITE) { dropPartition(dbName, tbName, hivePartition.getPartitionValues(), true); @@ -396,7 +398,7 @@ public class HMSTransaction implements Transaction { partition.getParameters(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - hiveOps.getClient().getSchema(dbName, tbName) + getTableColumns(dbName, tbName) ); partitionActionsForTable.put( @@ -913,6 +915,11 @@ public class HMSTransaction implements Transaction { throw new RuntimeException("Not Found table: " + databaseName + "." + tableName); } + public synchronized List<FieldSchema> getTableColumns(String databaseName, String tableName) { + return tableColumns.computeIfAbsent(new DatabaseTableName(databaseName, tableName), + key -> hiveOps.getClient().getSchema(dbName, tbName)); + } + public synchronized void finishChangingExistingTable( ActionType actionType, String databaseName, @@ -1276,7 +1283,7 @@ public class HMSTransaction implements Transaction { Maps.newHashMap(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - hiveOps.getClient().getSchema(dbName, tbName) + getTableColumns(dbName, tbName) ); HivePartitionWithStatistics partitionWithStats = --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org