This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 50b81a9c13  [Fix](multi-catalog) Filter invisible files for hive 
table. (#21867)
50b81a9c13 is described below

commit 50b81a9c13bd05446b15cebc4fcb1e5e18e6e28a
Author: Xiangyu Wang <dut.xian...@gmail.com>
AuthorDate: Tue Jul 18 13:08:12 2023 +0800

     [Fix](multi-catalog) Filter invisible files for hive table. (#21867)
    
    In fact, hive can not read files which startswith "." or "_", so we need 
filter these files.
---
 .../doris/datasource/hive/HiveMetaStoreCache.java  | 32 ++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 97a38b9864..1db50c93fb 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -51,7 +51,6 @@ import org.apache.doris.planner.ColumnBound;
 import org.apache.doris.planner.ListPartitionPrunerV2;
 import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
 import org.apache.doris.planner.external.FileSplit;
-import org.apache.doris.spi.Split;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
@@ -66,6 +65,7 @@ import com.google.common.collect.RangeMap;
 import com.google.common.collect.Streams;
 import com.google.common.collect.TreeRangeMap;
 import lombok.Data;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
@@ -957,7 +957,7 @@ public class HiveMetaStoreCache {
         // File Cache for self splitter.
         private final List<HiveFileStatus> files = Lists.newArrayList();
         // File split cache for old splitter. This is a temp variable.
-        private final List<Split> splits = Lists.newArrayList();
+        private final List<FileSplit> splits = Lists.newArrayList();
         private boolean isSplittable;
         // The values of partitions.
         // e.g for file : hdfs://path/to/table/part1=a/part2=b/datafile
@@ -967,17 +967,21 @@ public class HiveMetaStoreCache {
         private AcidInfo acidInfo;
 
         public void addFile(RemoteFile file) {
-            HiveFileStatus status = new HiveFileStatus();
-            status.setBlockLocations(file.getBlockLocations());
-            status.setPath(file.getPath());
-            status.length = file.getSize();
-            status.blockSize = file.getBlockSize();
-            status.modificationTime = file.getModificationTime();
-            files.add(status);
+            if (isFileVisible(file.getName())) {
+                HiveFileStatus status = new HiveFileStatus();
+                status.setBlockLocations(file.getBlockLocations());
+                status.setPath(file.getPath());
+                status.length = file.getSize();
+                status.blockSize = file.getBlockSize();
+                status.modificationTime = file.getModificationTime();
+                files.add(status);
+            }
         }
 
-        public void addSplit(Split split) {
-            splits.add(split);
+        public void addSplit(FileSplit split) {
+            if (isFileVisible(split.getPath().getName())) {
+                splits.add(split);
+            }
         }
 
         public int getValuesSize() {
@@ -992,6 +996,12 @@ public class HiveMetaStoreCache {
         public void setAcidInfo(AcidInfo acidInfo) {
             this.acidInfo = acidInfo;
         }
+
+        private boolean isFileVisible(String filename) {
+            return StringUtils.isNotEmpty(filename)
+                        && !filename.startsWith(".")
+                        && !filename.startsWith("_");
+        }
     }
 
     @Data


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to