noslowerdna commented on a change in pull request #654: HADOOP-15183 S3Guard
store becomes inconsistent after partial failure of rename
URL: https://github.com/apache/hadoop/pull/654#discussion_r281802726
##########
File path:
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
##########
@@ -1199,115 +1229,248 @@ private boolean innerRename(Path source, Path dest)
}
}
- // If we have a MetadataStore, track deletions/creations.
- Collection<Path> srcPaths = null;
- List<PathMetadata> dstMetas = null;
- if (hasMetadataStore()) {
- srcPaths = new HashSet<>(); // srcPaths need fast look up before put
- dstMetas = new ArrayList<>();
- }
- // TODO S3Guard HADOOP-13761: retries when source paths are not visible yet
- // TODO S3Guard: performance: mark destination dirs as authoritative
-
- // Ok! Time to start
- if (srcStatus.isFile()) {
- LOG.debug("rename: renaming file {} to {}", src, dst);
- long length = srcStatus.getLen();
- if (dstStatus != null && dstStatus.isDirectory()) {
- String newDstKey = maybeAddTrailingSlash(dstKey);
- String filename =
- srcKey.substring(pathToKey(src.getParent()).length()+1);
- newDstKey = newDstKey + filename;
- copyFile(srcKey, newDstKey, length);
- S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src,
- keyToQualifiedPath(newDstKey), length, getDefaultBlockSize(dst),
- username);
- } else {
- copyFile(srcKey, dstKey, srcStatus.getLen());
- S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, dst,
- length, getDefaultBlockSize(dst), username);
- }
- innerDelete(srcStatus, false);
- } else {
- LOG.debug("rename: renaming directory {} to {}", src, dst);
+ // Validation completed: time to begin the operation.
+ // The store-specific rename operation is used to keep the store
+ // to date with the in-progress operation.
+ // for the null store, these are all no-ops.
+ final RenameTracker renameTracker =
+ metadataStore.initiateRenameOperation(
+ createStoreContext(),
+ src, srcStatus, dest);
+ final AtomicLong bytesCopied = new AtomicLong();
+ int renameParallelLimit = 10;
Review comment:
Maybe add a comment about how you determined 10 to be optimal, and why it
doesn't need to be configurable
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]