RussellSpitzer commented on code in PR #9546: URL: https://github.com/apache/iceberg/pull/9546#discussion_r1493072836
########## core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java: ########## @@ -355,71 +475,90 @@ int findVersion() { * an attempt will be made to delete the source file. * * @param fs the filesystem used for the rename - * @param src the source file - * @param dst the destination file + * @param tempMetaDataFile the source file + * @param finalMetaDataFile the destination file */ - private void renameToFinal(FileSystem fs, Path src, Path dst, int nextVersion) { + @VisibleForTesting + boolean commitNewVersion( + FileSystem fs, Path tempMetaDataFile, Path finalMetaDataFile, Integer nextVersion) + throws IOException { try { - if (!lockManager.acquire(dst.toString(), src.toString())) { + if (!lockManager.acquire(finalMetaDataFile.toString(), finalMetaDataFile.toString())) { throw new CommitFailedException( - "Failed to acquire lock on file: %s with owner: %s", dst, src); + "Failed to acquire lock on file: %s with owner: %s", + finalMetaDataFile, tempMetaDataFile); } - if (fs.exists(dst)) { - throw new CommitFailedException("Version %d already exists: %s", nextVersion, dst); - } - - if (!fs.rename(src, dst)) { - CommitFailedException cfe = - new CommitFailedException("Failed to commit changes using rename: %s", dst); - RuntimeException re = tryDelete(src); - if (re != null) { - cfe.addSuppressed(re); - } - throw cfe; + if (fs.exists(finalMetaDataFile)) { + throw new CommitFailedException( + "Version %d already exists: %s", nextVersion, finalMetaDataFile); } - } catch (IOException e) { - CommitFailedException cfe = - new CommitFailedException(e, "Failed to commit changes using rename: %s", dst); - RuntimeException re = tryDelete(src); - if (re != null) { - cfe.addSuppressed(re); + // maybe too heavy.....? + if (!nextVersionIsLatest(nextVersion, fs)) { + // In the case of concurrent execution, + // verify that the version that is ready to be committed at a time is the latest version. + throw new CommitFailedException("Version %d too old: %s", nextVersion, finalMetaDataFile); } - throw cfe; + return renameMetaDataFileAndCheck(fs, tempMetaDataFile, finalMetaDataFile); } finally { - if (!lockManager.release(dst.toString(), src.toString())) { - LOG.warn("Failed to release lock on file: {} with owner: {}", dst, src); + if (!lockManager.release(finalMetaDataFile.toString(), finalMetaDataFile.toString())) { + LOG.warn( + "Failed to release lock on file: {} with owner: {}", + finalMetaDataFile, + tempMetaDataFile); } } } - /** - * Deletes the file from the file system. Any RuntimeException will be caught and returned. - * - * @param path the file to be deleted. - * @return RuntimeException caught, if any. null otherwise. - */ - private RuntimeException tryDelete(Path path) { - try { - io().deleteFile(path.toString()); - return null; - } catch (RuntimeException re) { - return re; - } + private void cleanUncommittedMeta(Path src) { + io().deleteFile(src.toString()); } protected FileSystem getFileSystem(Path path, Configuration hadoopConf) { return Util.getFs(path, hadoopConf); } + @VisibleForTesting + boolean renameMetaDataFileAndCheck(FileSystem fs, Path tempMetaDataFile, Path finalMetaDataFile) { + try { + // The most important step. There must be no mistakes in this step. + // Even if it does, we should stop everything. + return renameMetaDataFile(fs, tempMetaDataFile, finalMetaDataFile); + } catch (Throwable e) { + LOG.error("There were some problems with submitting the new version.", e); + try { + if (newMetadataExists(fs, finalMetaDataFile) && !tempMetadataExists(fs, tempMetaDataFile)) { + return true; + } else { + throw new CommitFailedException(e); + } + } catch (CommitFailedException e1) { + throw e1; + } catch (Throwable e2) { + throw new CommitStateUnknownException(e2); + } + } + } + + @VisibleForTesting Review Comment: Why are there two methods here? They both do exactly the same thing and i'm not sure they are any more succint then just inlining fs.exists -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org