This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 50b81a9c13 [Fix](multi-catalog) Filter invisible files for hive table. (#21867) 50b81a9c13 is described below commit 50b81a9c13bd05446b15cebc4fcb1e5e18e6e28a Author: Xiangyu Wang <dut.xian...@gmail.com> AuthorDate: Tue Jul 18 13:08:12 2023 +0800 [Fix](multi-catalog) Filter invisible files for hive table. (#21867) In fact, hive can not read files which startswith "." or "_", so we need filter these files. --- .../doris/datasource/hive/HiveMetaStoreCache.java | 32 ++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 97a38b9864..1db50c93fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -51,7 +51,6 @@ import org.apache.doris.planner.ColumnBound; import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId; import org.apache.doris.planner.external.FileSplit; -import org.apache.doris.spi.Split; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -66,6 +65,7 @@ import com.google.common.collect.RangeMap; import com.google.common.collect.Streams; import com.google.common.collect.TreeRangeMap; import lombok.Data; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; @@ -957,7 +957,7 @@ public class HiveMetaStoreCache { // File Cache for self splitter. private final List<HiveFileStatus> files = Lists.newArrayList(); // File split cache for old splitter. This is a temp variable. - private final List<Split> splits = Lists.newArrayList(); + private final List<FileSplit> splits = Lists.newArrayList(); private boolean isSplittable; // The values of partitions. // e.g for file : hdfs://path/to/table/part1=a/part2=b/datafile @@ -967,17 +967,21 @@ public class HiveMetaStoreCache { private AcidInfo acidInfo; public void addFile(RemoteFile file) { - HiveFileStatus status = new HiveFileStatus(); - status.setBlockLocations(file.getBlockLocations()); - status.setPath(file.getPath()); - status.length = file.getSize(); - status.blockSize = file.getBlockSize(); - status.modificationTime = file.getModificationTime(); - files.add(status); + if (isFileVisible(file.getName())) { + HiveFileStatus status = new HiveFileStatus(); + status.setBlockLocations(file.getBlockLocations()); + status.setPath(file.getPath()); + status.length = file.getSize(); + status.blockSize = file.getBlockSize(); + status.modificationTime = file.getModificationTime(); + files.add(status); + } } - public void addSplit(Split split) { - splits.add(split); + public void addSplit(FileSplit split) { + if (isFileVisible(split.getPath().getName())) { + splits.add(split); + } } public int getValuesSize() { @@ -992,6 +996,12 @@ public class HiveMetaStoreCache { public void setAcidInfo(AcidInfo acidInfo) { this.acidInfo = acidInfo; } + + private boolean isFileVisible(String filename) { + return StringUtils.isNotEmpty(filename) + && !filename.startsWith(".") + && !filename.startsWith("_"); + } } @Data --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org