This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new a3edd41db8 [branch1.2](fix) avoiding using broker reader when file size can not be got (#19474) a3edd41db8 is described below commit a3edd41db886bb323555e8f92cabe5a030960fa4 Author: Mingyu Chen <morning...@163.com> AuthorDate: Fri May 12 15:27:56 2023 +0800 [branch1.2](fix) avoiding using broker reader when file size can not be got (#19474) The broker reader need file size info. But for each file type, such as parquet, we can not get file size of InputFormat.getSplits(). So throw exception for this case and recommend to use hdfs or s3 reader. And also for IcebergSplit, get the right file size. --- .../apache/doris/planner/external/HiveSplit.java | 5 ++-- .../doris/planner/external/QueryScanProvider.java | 27 +++++++++++++--------- .../external/iceberg/IcebergScanProvider.java | 2 +- .../planner/external/iceberg/IcebergSplit.java | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java index 6c8f916a5e..76fc3b02fd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java @@ -23,10 +23,11 @@ import org.apache.hadoop.mapred.FileSplit; @Data public class HiveSplit extends FileSplit { - public HiveSplit() {} + private long fileSize; - public HiveSplit(Path file, long start, long length, String[] hosts) { + public HiveSplit(Path file, long start, long length, long fileSize, String[] hosts) { super(file, start, length, hosts); + this.fileSize = fileSize; } protected TableFormatType tableFormatType; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java index d894dd1fff..a208321cc2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java @@ -116,18 +116,13 @@ public abstract class QueryScanProvider implements FileScanProviderIf { List<String> partitionValuesFromPath = BrokerUtil.parseColumnsFromPath(fileSplit.getPath().toString(), pathPartitionKeys, false); - TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, partitionValuesFromPath, pathPartitionKeys); + TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, partitionValuesFromPath, pathPartitionKeys, + locationType); // external data lake table if (split instanceof IcebergSplit) { IcebergScanProvider.setIcebergParams(rangeDesc, (IcebergSplit) split); } - // file size of orc files is not correct get by FileSplit.getLength(), - // broker reader needs correct file size - if (locationType == TFileType.FILE_BROKER && fileFormatType == TFileFormatType.FORMAT_ORC) { - rangeDesc.setFileSize(((OrcSplit) fileSplit).getFileLength()); - } - curLocations.getScanRange().getExtScanRange().getFileScanRange().addToRanges(rangeDesc); LOG.debug("assign to backend {} with table split: {} ({}, {}), location: {}", curLocations.getLocations().get(0).getBackendId(), fileSplit.getPath(), fileSplit.getStart(), @@ -187,14 +182,24 @@ public abstract class QueryScanProvider implements FileScanProviderIf { } private TFileRangeDesc createFileRangeDesc(FileSplit fileSplit, List<String> columnsFromPath, - List<String> columnsFromPathKeys) + List<String> columnsFromPathKeys, TFileType locationType) throws DdlException, MetaNotFoundException { TFileRangeDesc rangeDesc = new TFileRangeDesc(); rangeDesc.setStartOffset(fileSplit.getStart()); rangeDesc.setSize(fileSplit.getLength()); - // fileSize only be used when format is orc or parquet and TFileType is broker - // When TFileType is other type, it is not necessary - rangeDesc.setFileSize(fileSplit.getLength()); + + // broker reader needs file size + if (locationType == TFileType.FILE_BROKER) { + if (fileSplit instanceof OrcSplit) { + rangeDesc.setFileSize(((OrcSplit) fileSplit).getFileLength()); + } else if (fileSplit instanceof HiveSplit) { + rangeDesc.setFileSize(((HiveSplit) fileSplit).getFileSize()); + } else { + throw new DdlException("File size can not be got, please do not use broker to read this file. " + + "Try to use hdfs reader or s3 reader."); + } + } + rangeDesc.setColumnsFromPath(columnsFromPath); rangeDesc.setColumnsFromPathKeys(columnsFromPathKeys); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java index 74d6c8c100..970dc97800 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergScanProvider.java @@ -175,7 +175,7 @@ public class IcebergScanProvider extends QueryScanProvider { for (FileScanTask splitTask : task.split(128 * 1024 * 1024)) { String dataFilePath = splitTask.file().path().toString(); IcebergSplit split = new IcebergSplit(new Path(dataFilePath), splitTask.start(), - splitTask.length(), new String[0]); + splitTask.length(), task.file().fileSizeInBytes(), new String[0]); split.setFormatVersion(formatVersion); if (formatVersion >= MIN_DELETE_FILE_SUPPORT_VERSION) { split.setDeleteFileFilters(getDeleteFileFilters(splitTask)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java index a82c99b04a..a7cd320773 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java @@ -27,8 +27,8 @@ import java.util.List; @Data public class IcebergSplit extends HiveSplit { - public IcebergSplit(Path file, long start, long length, String[] hosts) { - super(file, start, length, hosts); + public IcebergSplit(Path file, long start, long length, long fileSize, String[] hosts) { + super(file, start, length, fileSize, hosts); } private Analyzer analyzer; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org