morningman commented on code in PR #29339: URL: https://github.com/apache/doris/pull/29339#discussion_r1438626199
########## be/src/vec/exec/scan/vfile_scanner.cpp: ########## @@ -736,11 +736,19 @@ Status VFileScanner::_get_next_reader() { // JNI reader can only push down column value range bool push_down_predicates = !_is_load && _params->format_type != TFileFormatType::FORMAT_JNI; - if (format_type == TFileFormatType::FORMAT_JNI && range.__isset.table_format_params && - range.table_format_params.table_format_type == "hudi") { - if (range.table_format_params.hudi_params.delta_logs.empty()) { + if (format_type == TFileFormatType::FORMAT_JNI && range.__isset.table_format_params) { + if (range.table_format_params.table_format_type == "hudi" && + range.table_format_params.hudi_params.delta_logs.empty()) { // fall back to native reader if there is no log file format_type = TFileFormatType::FORMAT_PARQUET; + } else if (range.table_format_params.table_format_type == "paimon" && + !range.table_format_params.paimon_params.__isset.paimon_split) { + // use native reader + if (range.table_format_params.paimon_params.file_format == "orc") { + format_type = TFileFormatType::FORMAT_ORC; + } else if (range.table_format_params.paimon_params.file_format == "parquet") { + format_type = TFileFormatType::FORMAT_PARQUET; + } Review Comment: add `else` to return `supported format` error ########## fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java: ########## @@ -129,7 +138,28 @@ public List<Split> getSplits() throws UserException { .newScan().plan().splits(); for (org.apache.paimon.table.source.Split split : paimonSplits) { PaimonSplit paimonSplit = new PaimonSplit(split); - splits.add(paimonSplit); + if (split instanceof DataSplit) { + DataSplit dataSplit = (DataSplit) split; + Optional<List<RawFile>> optRowFiles = dataSplit.convertToRawFiles(); + if (optRowFiles.isPresent()) { + List<RawFile> rawFiles = optRowFiles.get(); + rawFiles.forEach(file -> { Review Comment: We need to take care of the file split, in case there are large file to be read in one thread. You can see the `splitFile()` method in `FileScanNode`. For example, in hive scan node, we will get the origin file splits first, than split these files again by size(default is 128MB) ########## fe/fe-core/src/main/java/org/apache/doris/planner/external/paimon/PaimonScanNode.java: ########## @@ -102,7 +107,11 @@ public void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(paimonSplit.getTableFormatType().value()); TPaimonFileDesc fileDesc = new TPaimonFileDesc(); - fileDesc.setPaimonSplit(encodeObjectToString(paimonSplit.getSplit())); + org.apache.paimon.table.source.Split split = paimonSplit.getSplit(); Review Comment: In which case the split can be null? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org