This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 735114a2df [branch1.2](catalog) skip empty file (#23168)
735114a2df is described below

commit 735114a2dfa0195c39393dfc144fd5648dfb6d5f
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Fri Aug 18 21:18:44 2023 +0800

    [branch1.2](catalog) skip empty file (#23168)
    
    We should skip empty files when getting file lists for hive partition.
    Otherwise it may return error when BE read an empty file.
    
    Only for branch-1.2. Master branch has fixed this issue.
---
 .../apache/doris/datasource/hive/HiveMetaStoreCache.java    | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 06a72c112b..44e6ec8eb9 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -270,7 +270,6 @@ public class HiveMetaStoreCache {
             FileInputFormat.setInputPaths(jobConf, finalLocation);
             try {
                 InputFormat<?, ?> inputFormat = 
HiveUtil.getInputFormat(jobConf, key.inputFormat, false);
-                HiveSplit[] hiveSplits;
                 InputSplit[] splits;
                 String remoteUser = jobConf.get(HdfsResource.HADOOP_USER_NAME);
                 if (!Strings.isNullOrEmpty(remoteUser)) {
@@ -287,7 +286,7 @@ public class HiveMetaStoreCache {
                     LOG.warn("Splits for location {} is null", finalLocation);
                     return ImmutableList.copyOf(new HiveSplit[0]);
                 }
-                hiveSplits = new HiveSplit[splits.length];
+                List<HiveSplit> hiveSplits = Lists.newArrayList();
                 List<String> pValues;
                 // handle default hive partition case, replace the default 
partition value with null_string.
                 if (key.hasDefaultPartitionValue) {
@@ -303,9 +302,15 @@ public class HiveMetaStoreCache {
                     pValues = key.partitionValues;
                 }
                 for (int i = 0; i < splits.length; i++) {
+                    if (splits[i].getLength() == 0) {
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("get empty file {}, skip it", 
splits[i].toString());
+                        }
+                        continue;
+                    }
                     FileSplit fileSplit = (FileSplit) splits[i];
-                    hiveSplits[i] = new HiveSplit(fileSplit.getPath(), 
fileSplit.getStart(), fileSplit.getLength(),
-                        fileSplit.getLength(), null, pValues);
+                    hiveSplits.add(new HiveSplit(fileSplit.getPath(), 
fileSplit.getStart(), fileSplit.getLength(),
+                            fileSplit.getLength(), null, pValues));
                 }
                 return ImmutableList.copyOf(hiveSplits);
             } catch (Exception e) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to