This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 352a0c2e17 [Improvement](multi catalog)Cache file system to improve list remote files performance (#21700) 352a0c2e17 is described below commit 352a0c2e179768ec3d327e105046c1a93672f5a9 Author: Jibing-Li <64681310+jibing...@users.noreply.github.com> AuthorDate: Fri Jul 14 09:59:46 2023 +0800 [Improvement](multi catalog)Cache file system to improve list remote files performance (#21700) Use file system type and Conf as key to cache remote file system. This could avoid get a new file system for each external table partition's location. The time cost for fetching 100000 partitions with 1 file for each partition is reduced to 22s from about 15 minutes. --- .../main/java/org/apache/doris/common/Config.java | 4 + .../doris/datasource/ExternalMetaCacheMgr.java | 8 ++ .../doris/datasource/hive/HiveMetaStoreCache.java | 24 ++++-- .../java/org/apache/doris/fs/FileSystemCache.java | 91 ++++++++++++++++++++++ .../org/apache/doris/fs/FileSystemFactory.java | 43 ++++++---- .../java/org/apache/doris/fs/FileSystemType.java | 25 ++++++ .../doris/planner/external/HiveScanNode.java | 2 +- .../doris/statistics/util/StatisticsUtil.java | 3 +- 8 files changed, 177 insertions(+), 23 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 120fbfffaa..e901542ea5 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1700,6 +1700,10 @@ public class Config extends ConfigBase { "Max number of hive partition values to return while list partitions, -1 means no limitation."}) public static short max_hive_list_partition_num = -1; + @ConfField(mutable = false, masterOnly = false, description = {"远程文件系统缓存的最大数量", + "Max cache number of remote file system."}) + public static long max_remote_file_system_cache_num = 100; + /** * Max cache loader thread-pool size. * Max thread pool size for loading external meta cache diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java index 16ffcc71f6..57f473f1ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java @@ -23,6 +23,7 @@ import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.Config; import org.apache.doris.common.ThreadPoolManager; import org.apache.doris.datasource.hive.HiveMetaStoreCache; +import org.apache.doris.fs.FileSystemCache; import org.apache.doris.planner.external.hudi.HudiPartitionMgr; import org.apache.doris.planner.external.hudi.HudiPartitionProcessor; @@ -49,6 +50,8 @@ public class ExternalMetaCacheMgr { // hudi partition manager private final HudiPartitionMgr hudiPartitionMgr; private ExecutorService executor; + // all catalogs could share the same fsCache. + private FileSystemCache fsCache; public ExternalMetaCacheMgr() { executor = ThreadPoolManager.newDaemonFixedThreadPool( @@ -56,6 +59,7 @@ public class ExternalMetaCacheMgr { Config.max_external_cache_loader_thread_pool_size * 1000, "ExternalMetaCacheMgr", 120, true); hudiPartitionMgr = HudiPartitionMgr.get(executor); + fsCache = new FileSystemCache(executor); } public HiveMetaStoreCache getMetaStoreCache(HMSExternalCatalog catalog) { @@ -88,6 +92,10 @@ public class ExternalMetaCacheMgr { return hudiPartitionMgr.getPartitionProcessor(catalog); } + public FileSystemCache getFsCache() { + return fsCache; + } + public void removeCache(long catalogId) { if (cacheMap.remove(catalogId) != null) { LOG.info("remove hive metastore cache for catalog {}", catalogId); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 1b5ed54ed1..51653afc68 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -21,6 +21,7 @@ import org.apache.doris.analysis.PartitionValue; import org.apache.doris.backup.Status; import org.apache.doris.backup.Status.ErrCode; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HdfsResource; import org.apache.doris.catalog.ListPartitionItem; import org.apache.doris.catalog.PartitionItem; @@ -37,6 +38,7 @@ import org.apache.doris.datasource.CacheException; import org.apache.doris.datasource.HMSExternalCatalog; import org.apache.doris.datasource.hive.AcidInfo.DeleteDeltaInfo; import org.apache.doris.external.hive.util.HiveUtil; +import org.apache.doris.fs.FileSystemCache; import org.apache.doris.fs.FileSystemFactory; import org.apache.doris.fs.RemoteFiles; import org.apache.doris.fs.remote.RemoteFile; @@ -357,11 +359,12 @@ public class HiveMetaStoreCache { // Get File Status by using FileSystem API. private FileCacheValue getFileCache(String location, InputFormat<?, ?> inputFormat, - JobConf jobConf, - List<String> partitionValues) throws UserException { + JobConf jobConf, + List<String> partitionValues) throws UserException { FileCacheValue result = new FileCacheValue(); result.setSplittable(HiveUtil.isSplittable(inputFormat, new Path(location), jobConf)); - RemoteFileSystem fs = FileSystemFactory.getByLocation(location, jobConf); + RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( + new FileSystemCache.FileSystemCacheKey(FileSystemFactory.getLocationType(location), jobConf)); try { // For Tez engine, it may generate subdirectoies for "union" query. // So there may be files and directories in the table directory at the same time. eg: @@ -493,7 +496,8 @@ public class HiveMetaStoreCache { } } - public List<FileCacheValue> getFilesByPartitions(List<HivePartition> partitions, boolean useSelfSplitter) { + public List<FileCacheValue> getFilesByPartitions(List<HivePartition> partitions, + boolean useSelfSplitter) { long start = System.currentTimeMillis(); List<FileCacheKey> keys = partitions.stream().map(p -> { FileCacheKey fileCacheKey = p.isDummyPartition() @@ -726,7 +730,7 @@ public class HiveMetaStoreCache { } public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions, ValidWriteIdList validWriteIds, - boolean isFullAcid) { + boolean isFullAcid, long tableId) { List<FileCacheValue> fileCacheValues = Lists.newArrayList(); String remoteUser = jobConf.get(HdfsResource.HADOOP_USER_NAME); try { @@ -755,7 +759,9 @@ public class HiveMetaStoreCache { !directory.getCurrentDirectories().isEmpty() ? directory.getCurrentDirectories().get(0) .getPath() : null; String acidVersionPath = new Path(baseOrDeltaPath, "_orc_acid_version").toUri().toString(); - RemoteFileSystem fs = FileSystemFactory.getByLocation(baseOrDeltaPath.toUri().toString(), jobConf); + RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( + new FileSystemCache.FileSystemCacheKey( + FileSystemFactory.getLocationType(baseOrDeltaPath.toUri().toString()), jobConf)); Status status = fs.exists(acidVersionPath); if (status != Status.OK) { if (status.getErrCode() == ErrCode.NOT_FOUND) { @@ -775,7 +781,8 @@ public class HiveMetaStoreCache { List<DeleteDeltaInfo> deleteDeltas = new ArrayList<>(); for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) { String location = delta.getPath().toString(); - RemoteFileSystem fs = FileSystemFactory.getByLocation(location, jobConf); + RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( + new FileSystemCache.FileSystemCacheKey(FileSystemFactory.getLocationType(location), jobConf)); RemoteFiles locatedFiles = fs.listLocatedFiles(location, true, false); if (delta.isDeleteDelta()) { List<String> deleteDeltaFileNames = locatedFiles.files().stream().map(f -> f.getName()).filter( @@ -792,7 +799,8 @@ public class HiveMetaStoreCache { // base if (directory.getBaseDirectory() != null) { String location = directory.getBaseDirectory().toString(); - RemoteFileSystem fs = FileSystemFactory.getByLocation(location, jobConf); + RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem( + new FileSystemCache.FileSystemCacheKey(FileSystemFactory.getLocationType(location), jobConf)); RemoteFiles locatedFiles = fs.listLocatedFiles(location, true, false); locatedFiles.files().stream().filter( f -> f.getName().startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)) diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java new file mode 100644 index 0000000000..aa6123d807 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.fs; + +import org.apache.doris.common.Config; +import org.apache.doris.common.util.CacheBulkLoader; +import org.apache.doris.datasource.CacheException; +import org.apache.doris.fs.remote.RemoteFileSystem; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.LoadingCache; +import org.apache.hadoop.mapred.JobConf; + +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; + +public class FileSystemCache { + + private LoadingCache<FileSystemCacheKey, RemoteFileSystem> fileSystemCache; + + public FileSystemCache(ExecutorService executor) { + fileSystemCache = CacheBuilder.newBuilder().maximumSize(Config.max_remote_file_system_cache_num) + .expireAfterAccess(Config.external_cache_expire_time_minutes_after_access, TimeUnit.MINUTES) + .build(new CacheBulkLoader<FileSystemCacheKey, RemoteFileSystem>() { + @Override + protected ExecutorService getExecutor() { + return executor; + } + + @Override + public RemoteFileSystem load(FileSystemCacheKey key) { + return loadFileSystem(key); + } + }); + } + + private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) { + return FileSystemFactory.getByType(key.type, key.conf); + } + + public RemoteFileSystem getRemoteFileSystem(FileSystemCacheKey key) { + try { + return fileSystemCache.get(key); + } catch (ExecutionException e) { + throw new CacheException("failed to get remote filesystem for type[%s]", e, key.type); + } + } + + public static class FileSystemCacheKey { + private final FileSystemType type; + private final JobConf conf; + + public FileSystemCacheKey(FileSystemType type, JobConf conf) { + this.type = type; + this.conf = conf; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof FileSystemCacheKey)) { + return false; + } + return type.equals(((FileSystemCacheKey) obj).type) && conf == ((FileSystemCacheKey) obj).conf; + } + + @Override + public int hashCode() { + return Objects.hash(conf, type); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java index 5ca3e1f21f..73d0c19472 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java @@ -19,6 +19,7 @@ package org.apache.doris.fs; import org.apache.doris.analysis.StorageBackend; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.util.S3Util; import org.apache.doris.fs.remote.BrokerFileSystem; import org.apache.doris.fs.remote.RemoteFileSystem; import org.apache.doris.fs.remote.S3FileSystem; @@ -52,23 +53,39 @@ public class FileSystemFactory { } } - public static RemoteFileSystem getByLocation(String location, Configuration conf) { - // TODO: need optimize the method. the conf is converted many times. - Map<String, String> properties = new HashMap<>(); - conf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue())); - if (location.startsWith(FeConstants.FS_PREFIX_S3) - || location.startsWith(FeConstants.FS_PREFIX_OSS) - || location.startsWith(FeConstants.FS_PREFIX_COS) - || location.startsWith(FeConstants.FS_PREFIX_OBS)) { - return new S3FileSystem(properties); + public static FileSystemType getLocationType(String location) { + if (S3Util.isObjStorage(location)) { + if (S3Util.isHdfsOnOssEndpoint(location)) { + // if hdfs service is enabled on oss, use hdfs lib to access oss. + return FileSystemType.DFS; + } + return FileSystemType.S3; } else if (location.startsWith(FeConstants.FS_PREFIX_HDFS) || location.startsWith(FeConstants.FS_PREFIX_GFS)) { - return new DFSFileSystem(properties); + return FileSystemType.DFS; } else if (location.startsWith(FeConstants.FS_PREFIX_OFS)) { - return new OFSFileSystem(properties); + return FileSystemType.OFS; } else if (location.startsWith(FeConstants.FS_PREFIX_JFS)) { - return new JFSFileSystem(properties); + return FileSystemType.JFS; + } else { + throw new UnsupportedOperationException("Unknown file system for location: " + location); + } + } + + public static RemoteFileSystem getByType(FileSystemType type, Configuration conf) { + Map<String, String> properties = new HashMap<>(); + conf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue())); + switch (type) { + case S3: + return new S3FileSystem(properties); + case DFS: + return new DFSFileSystem(properties); + case OFS: + return new OFSFileSystem(properties); + case JFS: + return new JFSFileSystem(properties); + default: + throw new IllegalStateException("Not supported file system type: " + type); } - throw new UnsupportedOperationException("Can not create file system for: " + location); } public static RemoteFileSystem getS3FileSystem(Map<String, String> properties) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java new file mode 100644 index 0000000000..e3147943c2 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemType.java @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.fs; + +public enum FileSystemType { + S3, + DFS, + OFS, + JFS +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java index 52bcc67c32..d629914775 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java @@ -218,7 +218,7 @@ public class HiveScanNode extends FileQueryScanNode { } ValidWriteIdList validWriteIds = hiveTransaction.getValidWriteIds( ((HMSExternalCatalog) hmsTable.getCatalog()).getClient()); - return cache.getFilesByTransaction(partitions, validWriteIds, hiveTransaction.isFullAcid()); + return cache.getFilesByTransaction(partitions, validWriteIds, hiveTransaction.isFullAcid(), hmsTable.getId()); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index eef64ef1c9..6a52bb36b1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -577,7 +577,8 @@ public class StatisticsUtil { table.getRemoteTable().getSd().getLocation(), null)); } // Get files for all partitions. - List<HiveMetaStoreCache.FileCacheValue> filesByPartitions = cache.getFilesByPartitions(hivePartitions, true); + List<HiveMetaStoreCache.FileCacheValue> filesByPartitions = cache.getFilesByPartitions( + hivePartitions, true); long totalSize = 0; // Calculate the total file size. for (HiveMetaStoreCache.FileCacheValue files : filesByPartitions) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org