BsoBird commented on code in PR #9546: URL: https://github.com/apache/iceberg/pull/9546#discussion_r1650741211
########## core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java: ########## @@ -370,58 +402,70 @@ private void renameToFinal(FileSystem fs, Path src, Path dst, int nextVersion) { if (fs.exists(dst)) { throw new CommitFailedException("Version %d already exists: %s", nextVersion, dst); } - - if (!fs.rename(src, dst)) { - CommitFailedException cfe = - new CommitFailedException("Failed to commit changes using rename: %s", dst); - RuntimeException re = tryDelete(src); - if (re != null) { - cfe.addSuppressed(re); - } - throw cfe; - } - } catch (IOException e) { - CommitFailedException cfe = - new CommitFailedException(e, "Failed to commit changes using rename: %s", dst); - RuntimeException re = tryDelete(src); - if (re != null) { - cfe.addSuppressed(re); + if (!nextVersionIsLatest(nextVersion, fs)) { + throw new CommitFailedException( + "Cannot commit version [%d] because it is smaller or much larger than the current latest version [%d].Are there other clients running in parallel with the current task?", + nextVersion, findVersionWithOutVersionHint(fs)); } - throw cfe; + return renameMetaDataFileAndCheck(fs, src, dst); } finally { if (!lockManager.release(dst.toString(), src.toString())) { LOG.warn("Failed to release lock on file: {} with owner: {}", dst, src); } } } - /** - * Deletes the file from the file system. Any RuntimeException will be caught and returned. - * - * @param path the file to be deleted. - * @return RuntimeException caught, if any. null otherwise. - */ - private RuntimeException tryDelete(Path path) { + protected FileSystem getFileSystem(Path path, Configuration hadoopConf) { + return Util.getFs(path, hadoopConf); + } + + @VisibleForTesting + boolean checkMetaDataFileRenameSuccess( + FileSystem fs, Path tempMetaDataFile, Path finalMetaDataFile) throws IOException { + return fs.exists(finalMetaDataFile) && !fs.exists(tempMetaDataFile); + } + + @VisibleForTesting + boolean renameMetaDataFile(FileSystem fs, Path tempMetaDataFile, Path finalMetaDataFile) + throws IOException { + return fs.rename(tempMetaDataFile, finalMetaDataFile); + } + + private boolean renameCheck( + FileSystem fs, Path tempMetaDataFile, Path finalMetaDataFile, Throwable rootError) { try { - io().deleteFile(path.toString()); - return null; - } catch (RuntimeException re) { - return re; + return checkMetaDataFileRenameSuccess(fs, tempMetaDataFile, finalMetaDataFile); + } catch (Throwable e) { + LOG.error( + "No correctness check can be performed.src=[{}],dst=[{}].", + tempMetaDataFile, + finalMetaDataFile, + e); + String msg = + String.format( + "Exception thrown when renaming [%s] to [%s].Also no correctness check can be performed.", + tempMetaDataFile, finalMetaDataFile); + throw new CommitStateUnknownException(msg, rootError != null ? rootError : e); } } - protected FileSystem getFileSystem(Path path, Configuration hadoopConf) { - return Util.getFs(path, hadoopConf); + @VisibleForTesting + boolean renameMetaDataFileAndCheck(FileSystem fs, Path tempMetaDataFile, Path finalMetaDataFile) { + try { + return renameMetaDataFile(fs, tempMetaDataFile, finalMetaDataFile); + } catch (Throwable e) { Review Comment: > I think it is best to be more specific here: @Fokko This is a crucial step. I think we should enlarge the scope of the exception detection. This is because at this point we may encounter OOM exceptions and so on. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org