morningman commented on code in PR #49489: URL: https://github.com/apache/doris/pull/49489#discussion_r2020628071
########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java: ########## @@ -345,23 +353,50 @@ public boolean isBatchMode() { return false; } } - // TODO Use a better judgment method to decide whether to use batch mode. - return sessionVariable.getNumPartitionsInBatchMode() > 1024; + + if (createTableScan().snapshot() == null) { + return false; + } + + if (!sessionVariable.getEnableExternalTableBatchMode()) { + return false; + } + + try (CloseableIterator<ManifestFile> matchingManifest = + IcebergUtils.getMatchingManifest( + createTableScan().snapshot().dataManifests(icebergTable.io()), + icebergTable.specs(), + createTableScan().filter()).iterator()) { + int cnt = 0; + while (matchingManifest.hasNext()) { + cnt += matchingManifest.next().addedFilesCount(); + if (cnt >= sessionVariable.getNumPartitionsInBatchMode()) { + return true; + } + } + } catch (Exception e) { + Optional<NotSupportedException> opt = checkNotSupportedException(e); + if (opt.isPresent()) { + throw opt.get(); + } else { + throw new RuntimeException(e); + } + } + return false; } - public Long getSpecifiedSnapshot() throws UserException { + public Long getSpecifiedSnapshot() { TableSnapshot tableSnapshot = getQueryTableSnapshot(); if (tableSnapshot != null) { TableSnapshot.VersionType type = tableSnapshot.getType(); - try { - if (type == TableSnapshot.VersionType.VERSION) { - return tableSnapshot.getVersion(); - } else { - long timestamp = TimeUtils.timeStringToLong(tableSnapshot.getTime(), TimeUtils.getTimeZone()); - return SnapshotUtil.snapshotIdAsOfTime(icebergTable, timestamp); + if (type == TableSnapshot.VersionType.VERSION) { Review Comment: Why not wrap with `try catch`? ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java: ########## @@ -709,4 +715,32 @@ public static HiveCatalog createIcebergHiveCatalog(ExternalCatalog externalCatal hiveCatalog.initialize(name, catalogProperties); return hiveCatalog; } + + public static CloseableIterable<ManifestFile> getMatchingManifest( Review Comment: Add comment for this method ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java: ########## @@ -345,23 +353,50 @@ public boolean isBatchMode() { return false; } } - // TODO Use a better judgment method to decide whether to use batch mode. - return sessionVariable.getNumPartitionsInBatchMode() > 1024; + + if (createTableScan().snapshot() == null) { + return false; + } + + if (!sessionVariable.getEnableExternalTableBatchMode()) { + return false; + } + + try (CloseableIterator<ManifestFile> matchingManifest = + IcebergUtils.getMatchingManifest( + createTableScan().snapshot().dataManifests(icebergTable.io()), Review Comment: We may encounter authentication issue here? ########## fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java: ########## @@ -345,23 +353,50 @@ public boolean isBatchMode() { return false; } } - // TODO Use a better judgment method to decide whether to use batch mode. - return sessionVariable.getNumPartitionsInBatchMode() > 1024; + + if (createTableScan().snapshot() == null) { + return false; + } + + if (!sessionVariable.getEnableExternalTableBatchMode()) { + return false; + } + + try (CloseableIterator<ManifestFile> matchingManifest = + IcebergUtils.getMatchingManifest( + createTableScan().snapshot().dataManifests(icebergTable.io()), + icebergTable.specs(), + createTableScan().filter()).iterator()) { + int cnt = 0; + while (matchingManifest.hasNext()) { + cnt += matchingManifest.next().addedFilesCount(); + if (cnt >= sessionVariable.getNumPartitionsInBatchMode()) { Review Comment: It is strange to use file number to compare with partition number. ########## fe/fe-core/src/test/java/org/apache/doris/datasource/iceberg/IcebergUtilsTest.java: ########## @@ -60,4 +81,126 @@ private boolean getListAllTables(HiveCatalog hiveCatalog) throws IllegalAccessEx declaredField.setAccessible(true); return declaredField.getBoolean(hiveCatalog); } + + @Test + public void testGetMatchingManifest() { + + // partition : 100 - 200 + GenericManifestFile f1 = getGenericManifestFileForDataTypeWithPartitionSummary( + "manifest_f1.avro", Review Comment: are these `manifest_f1.avro` real files for test? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org