steveloughran commented on code in PR #10233:
URL: https://github.com/apache/iceberg/pull/10233#discussion_r1958562315


##########
core/src/main/java/org/apache/iceberg/hadoop/HadoopFileIO.java:
##########
@@ -173,8 +203,76 @@ public void deletePrefix(String prefix) {
     }
   }
 
+  /**
+   * Initialize the wrapped IO class if configured to do so.
+   *
+   * @return true if bulk delete should be used.
+   */
+  private synchronized boolean maybeUseBulkDeleteApi() {
+    if (!bulkDeleteConfigured.compareAndSet(false, true)) {
+      // configured already, so return.
+      return useBulkDelete;
+    }
+    boolean enableBulkDelete = conf().getBoolean(BULK_DELETE_ENABLED, 
BULK_DELETE_ENABLED_DEFAULT);
+    if (!enableBulkDelete) {
+      LOG.debug("Bulk delete is disabled");
+      useBulkDelete = false;
+    } else {
+      // library is configured to use bulk delete, so try to load it
+      // and probe for the bulk delete methods being found.
+      // this is only satisfied on Hadoop releases with the WrappedIO class.
+      wrappedIO = new DynamicWrappedIO(getClass().getClassLoader());
+      useBulkDelete = wrappedIO.bulkDeleteAvailable();
+      if (useBulkDelete) {
+        LOG.debug("Bulk delete is enabled and available");
+      } else {
+        LOG.debug("Bulk delete enabled but not available");
+      }
+    }
+    return useBulkDelete;
+  }
+
+  /**
+   * Will this instance use bulk delete operations? Exported for testing in 
hadoop-aws. Calling this
+   * method will trigger a load of the WrappedIO class if required.
+   *
+   * @return true if bulk delete is enabled and available.
+   */
+  public boolean isBulkDeleteApiUsed() {
+    return maybeUseBulkDeleteApi();
+  }
+
+  /**
+   * Delete files.
+   *
+   * <p>If the Hadoop bulk deletion API is available and enabled, this API is 
used through {@link
+   * #bulkDeleteFiles(Iterable)}. Otherwise, each file is deleted individually 
in the thread pool.
+   *
+   * @param pathsToDelete The paths to delete
+   * @throws BulkDeletionFailureException failure to delete one or more files.
+   */
   @Override
   public void deleteFiles(Iterable<String> pathsToDelete) throws 
BulkDeletionFailureException {
+    if (maybeUseBulkDeleteApi()) {
+      // bulk delete.
+      try {
+        final int count = bulkDeleteFiles(pathsToDelete);
+        if (count != 0) {
+          throw new BulkDeletionFailureException(count);
+        }
+        // deletion worked.
+        return;
+      } catch (UnsupportedOperationException e) {
+        // Something went very wrong with reflection here.
+        // Probably a mismatch between the hadoop FS APIs and the 
implementation
+        // class, either due to mocking or library versions.
+
+        // Log and fall back to the classic delete
+        LOG.debug("Failed to use bulk delete -falling back", e);

Review Comment:
   maybe we should stop trying again?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to