bgaborg commented on a change in pull request #951: HADOOP-15183. S3Guard store
becomes inconsistent after partial failure of rename
URL: https://github.com/apache/hadoop/pull/951#discussion_r292991041
##########
File path:
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
##########
@@ -748,24 +942,45 @@ public void move(Collection<Path> pathsToDelete,
// Following code is to maintain this invariant by putting all ancestor
// directories of the paths to create.
// ancestor paths that are not explicitly added to paths to create
- Collection<DDBPathMetadata> newItems = new ArrayList<>();
+ AncestorState ancestorState = extractOrCreate(operationState,
+ BulkOperationState.OperationType.Rename);
+ List<DDBPathMetadata> newItems = new ArrayList<>();
if (pathsToCreate != null) {
- newItems.addAll(completeAncestry(pathMetaToDDBPathMeta(pathsToCreate)));
+ // create all parent entries.
+ // this is synchronized on the move state so that across both serialized
+ // and parallelized renames, duplicate ancestor entries are not created.
+ synchronized (ancestorState) {
+ newItems.addAll(
+ completeAncestry(
+ pathMetaToDDBPathMeta(pathsToCreate),
+ ancestorState));
+ }
}
+ // sort all the new items topmost first.
+ newItems.sort(PathOrderComparators.TOPMOST_PM_FIRST);
+
+ // now process the deletions.
if (pathsToDelete != null) {
+ List<DDBPathMetadata> tombstones = new ArrayList<>(pathsToDelete.size());
for (Path meta : pathsToDelete) {
- newItems.add(new DDBPathMetadata(PathMetadata.tombstone(meta)));
+ tombstones.add(new DDBPathMetadata(PathMetadata.tombstone(meta)));
}
+ // sort all the tombstones lowest first.
+ tombstones.sort(PathOrderComparators.TOPMOST_PM_LAST);
+ newItems.addAll(tombstones);
}
processBatchWriteRequest(null, pathMetadataToItem(newItems));
}
/**
* Helper method to issue a batch write request to DynamoDB.
- *
+ * <ol>
+ * <li>Keys to delete are processed ahead of writing new items.</li>
+ * <li>No attempt is made to sort the input: the caller must do that</li>
+ * </ol>
* As well as retrying on the operation invocation, incomplete
- * batches are retried until all have been deleted.
+ * batches are retried until all have been processed..
Review comment:
nit: double dots
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]