nastra commented on code in PR #12120: URL: https://github.com/apache/iceberg/pull/12120#discussion_r1952399081
########## spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java: ########## @@ -377,7 +377,8 @@ private Builder doExecuteWithPartialProgress( // stop commit service commitService.close(); - int failedCommits = maxCommits - commitService.succeededCommits(); + int totalCommits = groupsPerCommit == 1 ? ctx.totalGroupCount() : maxCommits; + int failedCommits = totalCommits - commitService.succeededCommits(); Review Comment: I feel like this calculation is weird and doesn't look at actually failed commits. Why not collect the failed commits within the commit service and use it here? Something like ``` --- a/core/src/main/java/org/apache/iceberg/actions/BaseCommitService.java +++ b/core/src/main/java/org/apache/iceberg/actions/BaseCommitService.java @@ -61,6 +61,7 @@ abstract class BaseCommitService<T> implements Closeable { private final AtomicBoolean running = new AtomicBoolean(false); private final long timeoutInMS; private int succeededCommits = 0; + private int failedCommits = 0; /** * Constructs a {@link BaseCommitService} @@ -231,6 +232,7 @@ abstract class BaseCommitService<T> implements Closeable { succeededCommits++; } catch (Exception e) { LOG.error("Failure during rewrite commit process, partial progress enabled. Ignoring", e); + failedCommits++; } inProgressCommits.remove(inProgressCommitToken); } @@ -240,6 +242,10 @@ abstract class BaseCommitService<T> implements Closeable { return succeededCommits; } + public int failedCommits() { + return failedCommits; + } + @VisibleForTesting boolean canCreateCommitGroup() { // Either we have a full commit group, or we have completed writing and need to commit diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java index e04a0c88b4..7c851c3dc9 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java @@ -377,7 +377,7 @@ public class RewriteDataFilesSparkAction // stop commit service commitService.close(); - int failedCommits = maxCommits - commitService.succeededCommits(); + int failedCommits = commitService.failedCommits(); if (failedCommits > 0 && failedCommits <= maxFailedCommits) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org