This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new a3edd41db8 [branch1.2](fix) avoiding using broker reader when file 
size can not be got (#19474)
a3edd41db8 is described below

commit a3edd41db886bb323555e8f92cabe5a030960fa4
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Fri May 12 15:27:56 2023 +0800

    [branch1.2](fix) avoiding using broker reader when file size can not be got 
(#19474)
    
    The broker reader need file size info.
    But for each file type, such as parquet, we can not get file size of 
InputFormat.getSplits().
    So throw exception for this case and recommend to use hdfs or s3 reader.
    
    And also for IcebergSplit, get the right file size.
---
 .../apache/doris/planner/external/HiveSplit.java   |  5 ++--
 .../doris/planner/external/QueryScanProvider.java  | 27 +++++++++++++---------
 .../external/iceberg/IcebergScanProvider.java      |  2 +-
 .../planner/external/iceberg/IcebergSplit.java     |  4 ++--
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
index 6c8f916a5e..76fc3b02fd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
@@ -23,10 +23,11 @@ import org.apache.hadoop.mapred.FileSplit;
 
 @Data
 public class HiveSplit extends FileSplit {
-    public HiveSplit() {}
+    private long fileSize;
 
-    public HiveSplit(Path file, long start, long length, String[] hosts) {
+    public HiveSplit(Path file, long start, long length, long fileSize, 
String[] hosts) {
         super(file, start, length, hosts);
+        this.fileSize = fileSize;
     }
 
     protected TableFormatType tableFormatType;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
index d894dd1fff..a208321cc2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
@@ -116,18 +116,13 @@ public abstract class QueryScanProvider implements 
FileScanProviderIf {
                 List<String> partitionValuesFromPath = 
BrokerUtil.parseColumnsFromPath(fileSplit.getPath().toString(),
                         pathPartitionKeys, false);
 
-                TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, 
partitionValuesFromPath, pathPartitionKeys);
+                TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, 
partitionValuesFromPath, pathPartitionKeys,
+                        locationType);
                 // external data lake table
                 if (split instanceof IcebergSplit) {
                     IcebergScanProvider.setIcebergParams(rangeDesc, 
(IcebergSplit) split);
                 }
 
-                // file size of orc files is not correct get by 
FileSplit.getLength(),
-                // broker reader needs correct file size
-                if (locationType == TFileType.FILE_BROKER && fileFormatType == 
TFileFormatType.FORMAT_ORC) {
-                    rangeDesc.setFileSize(((OrcSplit) 
fileSplit).getFileLength());
-                }
-
                 
curLocations.getScanRange().getExtScanRange().getFileScanRange().addToRanges(rangeDesc);
                 LOG.debug("assign to backend {} with table split: {} ({}, {}), 
location: {}",
                         curLocations.getLocations().get(0).getBackendId(), 
fileSplit.getPath(), fileSplit.getStart(),
@@ -187,14 +182,24 @@ public abstract class QueryScanProvider implements 
FileScanProviderIf {
     }
 
     private TFileRangeDesc createFileRangeDesc(FileSplit fileSplit, 
List<String> columnsFromPath,
-            List<String> columnsFromPathKeys)
+            List<String> columnsFromPathKeys, TFileType locationType)
             throws DdlException, MetaNotFoundException {
         TFileRangeDesc rangeDesc = new TFileRangeDesc();
         rangeDesc.setStartOffset(fileSplit.getStart());
         rangeDesc.setSize(fileSplit.getLength());
-        // fileSize only be used when format is orc or parquet and TFileType 
is broker
-        // When TFileType is other type, it is not necessary
-        rangeDesc.setFileSize(fileSplit.getLength());
+
+        // broker reader needs file size
+        if (locationType == TFileType.FILE_BROKER) {
+            if (fileSplit instanceof OrcSplit) {
+                rangeDesc.setFileSize(((OrcSplit) fileSplit).getFileLength());
+            } else if (fileSplit instanceof HiveSplit) {
+                rangeDesc.setFileSize(((HiveSplit) fileSplit).getFileSize());
+            } else {
+                throw new DdlException("File size can not be got, please do 
not use broker to read this file. "
+                        + "Try to use hdfs reader or s3 reader.");
+            }
+        }
+
         rangeDesc.setColumnsFromPath(columnsFromPath);
         rangeDesc.setColumnsFromPathKeys(columnsFromPathKeys);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java
index 74d6c8c100..970dc97800 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java
@@ -175,7 +175,7 @@ public class IcebergScanProvider extends QueryScanProvider {
             for (FileScanTask splitTask : task.split(128 * 1024 * 1024)) {
                 String dataFilePath = splitTask.file().path().toString();
                 IcebergSplit split = new IcebergSplit(new Path(dataFilePath), 
splitTask.start(),
-                        splitTask.length(), new String[0]);
+                        splitTask.length(), task.file().fileSizeInBytes(), new 
String[0]);
                 split.setFormatVersion(formatVersion);
                 if (formatVersion >= MIN_DELETE_FILE_SUPPORT_VERSION) {
                     
split.setDeleteFileFilters(getDeleteFileFilters(splitTask));
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
index a82c99b04a..a7cd320773 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
@@ -27,8 +27,8 @@ import java.util.List;
 
 @Data
 public class IcebergSplit extends HiveSplit {
-    public IcebergSplit(Path file, long start, long length, String[] hosts) {
-        super(file, start, length, hosts);
+    public IcebergSplit(Path file, long start, long length, long fileSize, 
String[] hosts) {
+        super(file, start, length, fileSize, hosts);
     }
 
     private Analyzer analyzer;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to