This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit bce470372970a5539abef46947a7c855c8ddf619 Author: Mingyu Chen <morning...@163.com> AuthorDate: Fri Aug 4 16:55:10 2023 +0800 [fix](show-table-status) fix hive view NPE and external meta cache refresh issue (#22377) --- .../main/java/org/apache/doris/common/Config.java | 2 +- .../doris/datasource/hive/HiveMetaStoreCache.java | 60 ++++++++++++++++++---- .../doris/planner/external/HiveScanNode.java | 4 +- .../java/org/apache/doris/qe/ShowExecutor.java | 6 ++- .../java/org/apache/doris/qe/StmtExecutor.java | 1 - .../doris/statistics/util/StatisticsUtil.java | 13 ++++- 6 files changed, 70 insertions(+), 16 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 1a6456fefa..5b539a89a6 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2023,7 +2023,7 @@ public class Config extends ConfigBase { public static int hive_stats_partition_sample_size = 3000; @ConfField(mutable = true, masterOnly = true, description = { - "用于强制设定内表的副本数,如果改参数大于零,则用户在建表时指定的副本数将被忽略,而使用本参数设置的值。" + "用于强制设定内表的副本数,如果该参数大于零,则用户在建表时指定的副本数将被忽略,而使用本参数设置的值。" + "同时,建表语句中指定的副本标签等参数会被忽略。该参数不影响包括创建分区、修改表属性的操作。该参数建议仅用于测试环境", "Used to force the number of replicas of the internal table. If the config is greater than zero, " + "the number of replicas specified by the user when creating the table will be ignored, " diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index d3e4750c3b..e7e621948e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -31,6 +31,7 @@ import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.common.util.CacheBulkLoader; import org.apache.doris.common.util.S3Util; @@ -98,6 +99,7 @@ import java.util.Objects; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -499,8 +501,18 @@ public class HiveMetaStoreCache { } } - public List<FileCacheValue> getFilesByPartitions(List<HivePartition> partitions, - boolean useSelfSplitter) { + public List<FileCacheValue> getFilesByPartitionsWithCache(List<HivePartition> partitions, + boolean useSelfSplitter) { + return getFilesByPartitions(partitions, useSelfSplitter, true); + } + + public List<FileCacheValue> getFilesByPartitionsWithoutCache(List<HivePartition> partitions, + boolean useSelfSplitter) { + return getFilesByPartitions(partitions, useSelfSplitter, false); + } + + private List<FileCacheValue> getFilesByPartitions(List<HivePartition> partitions, + boolean useSelfSplitter, boolean withCache) { long start = System.currentTimeMillis(); List<FileCacheKey> keys = partitions.stream().map(p -> { FileCacheKey fileCacheKey = p.isDummyPartition() @@ -513,28 +525,58 @@ public class HiveMetaStoreCache { List<FileCacheValue> fileLists; try { - fileLists = fileCacheRef.get().getAll(keys).values().asList(); + if (withCache) { + fileLists = fileCacheRef.get().getAll(keys).values().asList(); + } else { + List<Pair<FileCacheKey, Future<FileCacheValue>>> pList = keys.stream() + .map(key -> Pair.of(key, executor.submit(() -> loadFiles(key)))) + .collect(Collectors.toList()); + + fileLists = Lists.newArrayListWithExpectedSize(keys.size()); + for (Pair<FileCacheKey, Future<FileCacheValue>> p : pList) { + fileLists.add(p.second.get()); + } + } } catch (ExecutionException e) { throw new CacheException("failed to get files from partitions in catalog %s", - e, catalog.getName()); + e, catalog.getName()); + } catch (InterruptedException e) { + throw new CacheException("failed to get files from partitions in catalog %s with interrupted exception", + e, catalog.getName()); } LOG.debug("get #{} files from #{} partitions in catalog {} cost: {} ms", fileLists.stream().mapToInt(l -> l.getFiles() == null - ? (l.getSplits() == null ? 0 : l.getSplits().size()) : l.getFiles().size()).sum(), + ? (l.getSplits() == null ? 0 : l.getSplits().size()) : l.getFiles().size()).sum(), partitions.size(), catalog.getName(), (System.currentTimeMillis() - start)); return fileLists; } - public List<HivePartition> getAllPartitions(String dbName, String name, List<List<String>> partitionValuesList) { + public List<HivePartition> getAllPartitionsWithCache(String dbName, String name, + List<List<String>> partitionValuesList) { + return getAllPartitions(dbName, name, partitionValuesList, true); + } + + public List<HivePartition> getAllPartitionsWithoutCache(String dbName, String name, + List<List<String>> partitionValuesList) { + return getAllPartitions(dbName, name, partitionValuesList, false); + } + + private List<HivePartition> getAllPartitions(String dbName, String name, List<List<String>> partitionValuesList, + boolean withCache) { long start = System.currentTimeMillis(); List<PartitionCacheKey> keys = partitionValuesList.stream() - .map(p -> new PartitionCacheKey(dbName, name, p)) - .collect(Collectors.toList()); + .map(p -> new PartitionCacheKey(dbName, name, p)) + .collect(Collectors.toList()); List<HivePartition> partitions; try { - partitions = partitionCache.getAll(keys).values().asList(); + if (withCache) { + partitions = partitionCache.getAll(keys).values().asList(); + } else { + Map<PartitionCacheKey, HivePartition> map = loadPartitions(keys); + partitions = map.values().stream().collect(Collectors.toList()); + } } catch (ExecutionException e) { throw new CacheException("failed to get partition in catalog %s", e, catalog.getName()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java index 1fe115a0b9..6aa8c3185e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java @@ -151,7 +151,7 @@ public class HiveScanNode extends FileQueryScanNode { partitionValuesList.add(listPartitionItem.getItems().get(0).getPartitionValuesAsStringList()); } List<HivePartition> allPartitions = - cache.getAllPartitions(hmsTable.getDbName(), hmsTable.getName(), partitionValuesList); + cache.getAllPartitionsWithCache(hmsTable.getDbName(), hmsTable.getName(), partitionValuesList); if (ConnectContext.get().getExecutor() != null) { ConnectContext.get().getExecutor().getSummaryProfile().setGetPartitionsFinishTime(); } @@ -197,7 +197,7 @@ public class HiveScanNode extends FileQueryScanNode { if (hiveTransaction != null) { fileCaches = getFileSplitByTransaction(cache, partitions); } else { - fileCaches = cache.getFilesByPartitions(partitions, useSelfSplitter); + fileCaches = cache.getFilesByPartitionsWithCache(partitions, useSelfSplitter); } if (ConnectContext.get().getExecutor() != null) { ConnectContext.get().getExecutor().getSummaryProfile().setGetPartitionFilesFinishTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 322dcdf500..b25b24e982 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -872,7 +872,11 @@ public class ShowExecutor { // Row_format row.add(null); // Rows - row.add(String.valueOf(table.getRowCount())); + // Use estimatedRowCount(), not getRowCount(). + // because estimatedRowCount() is an async call, it will not block, and it will call getRowCount() + // finally. So that for some table(especially external table), + // we can get the row count without blocking. + row.add(String.valueOf(table.estimatedRowCount())); // Avg_row_length row.add(String.valueOf(table.getAvgRowLength())); // Data_length diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 21cf629e8f..f8d1d94a97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -1408,7 +1408,6 @@ public class StmtExecutor { } } - Span fetchResultSpan = context.getTracer().spanBuilder("fetch result").setParent(Context.current()).startSpan(); try (Scope scope = fetchResultSpan.makeCurrent()) { while (true) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index b2e4baecee..298c872c65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -550,6 +550,9 @@ public class StatisticsUtil { * @return estimated row count */ public static long getRowCountFromFileList(HMSExternalTable table) { + if (table.isView()) { + return 0; + } HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr() .getMetaStoreCache((HMSExternalCatalog) table.getCatalog()); List<Type> partitionColumnTypes = table.getPartitionColumnTypes(); @@ -559,6 +562,9 @@ public class StatisticsUtil { int totalPartitionSize = 1; // Get table partitions from cache. if (!partitionColumnTypes.isEmpty()) { + // It is ok to get partition values from cache, + // no need to worry that this call will invalid or refresh the cache. + // because it has enough space to keep partition info of all tables in cache. partitionValues = cache.getPartitionValues(table.getDbName(), table.getName(), partitionColumnTypes); } if (partitionValues != null) { @@ -579,14 +585,17 @@ public class StatisticsUtil { for (PartitionItem item : partitionItems) { partitionValuesList.add(((ListPartitionItem) item).getItems().get(0).getPartitionValuesAsStringList()); } - hivePartitions = cache.getAllPartitions(table.getDbName(), table.getName(), partitionValuesList); + // get partitions without cache, so that it will not invalid the cache when executing + // non query request such as `show table status` + hivePartitions = cache.getAllPartitionsWithoutCache(table.getDbName(), table.getName(), + partitionValuesList); } else { hivePartitions.add(new HivePartition(table.getDbName(), table.getName(), true, table.getRemoteTable().getSd().getInputFormat(), table.getRemoteTable().getSd().getLocation(), null)); } // Get files for all partitions. - List<HiveMetaStoreCache.FileCacheValue> filesByPartitions = cache.getFilesByPartitions( + List<HiveMetaStoreCache.FileCacheValue> filesByPartitions = cache.getFilesByPartitionsWithoutCache( hivePartitions, true); long totalSize = 0; // Calculate the total file size. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org