steveloughran commented on code in PR #10233: URL: https://github.com/apache/iceberg/pull/10233#discussion_r1958562315
########## core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java: ########## @@ -173,8 +203,76 @@ public void deletePrefix(String prefix) { } } + /** + * Initialize the wrapped IO class if configured to do so. + * + * @return true if bulk delete should be used. + */ + private synchronized boolean maybeUseBulkDeleteApi() { + if (!bulkDeleteConfigured.compareAndSet(false, true)) { + // configured already, so return. + return useBulkDelete; + } + boolean enableBulkDelete = conf().getBoolean(BULK_DELETE_ENABLED, BULK_DELETE_ENABLED_DEFAULT); + if (!enableBulkDelete) { + LOG.debug("Bulk delete is disabled"); + useBulkDelete = false; + } else { + // library is configured to use bulk delete, so try to load it + // and probe for the bulk delete methods being found. + // this is only satisfied on Hadoop releases with the WrappedIO class. + wrappedIO = new DynamicWrappedIO(getClass().getClassLoader()); + useBulkDelete = wrappedIO.bulkDeleteAvailable(); + if (useBulkDelete) { + LOG.debug("Bulk delete is enabled and available"); + } else { + LOG.debug("Bulk delete enabled but not available"); + } + } + return useBulkDelete; + } + + /** + * Will this instance use bulk delete operations? Exported for testing in hadoop-aws. Calling this + * method will trigger a load of the WrappedIO class if required. + * + * @return true if bulk delete is enabled and available. + */ + public boolean isBulkDeleteApiUsed() { + return maybeUseBulkDeleteApi(); + } + + /** + * Delete files. + * + * <p>If the Hadoop bulk deletion API is available and enabled, this API is used through {@link + * #bulkDeleteFiles(Iterable)}. Otherwise, each file is deleted individually in the thread pool. + * + * @param pathsToDelete The paths to delete + * @throws BulkDeletionFailureException failure to delete one or more files. + */ @Override public void deleteFiles(Iterable<String> pathsToDelete) throws BulkDeletionFailureException { + if (maybeUseBulkDeleteApi()) { + // bulk delete. + try { + final int count = bulkDeleteFiles(pathsToDelete); + if (count != 0) { + throw new BulkDeletionFailureException(count); + } + // deletion worked. + return; + } catch (UnsupportedOperationException e) { + // Something went very wrong with reflection here. + // Probably a mismatch between the hadoop FS APIs and the implementation + // class, either due to mocking or library versions. + + // Log and fall back to the classic delete + LOG.debug("Failed to use bulk delete -falling back", e); Review Comment: maybe we should stop trying again? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org