[
https://issues.apache.org/jira/browse/HADOOP-18012?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17704019#comment-17704019
]
ASF GitHub Bot commented on HADOOP-18012:
-----------------------------------------
anmolanmol1234 commented on code in PR #5488:
URL: https://github.com/apache/hadoop/pull/5488#discussion_r1145921432
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java:
##########
@@ -556,99 +583,150 @@ public AbfsClientRenameResult renamePath(
// isMetadataIncompleteState is used for renameRecovery(as the 2nd
param).
return new AbfsClientRenameResult(op, isMetadataIncompleteState,
isMetadataIncompleteState);
} catch (AzureBlobFileSystemException e) {
- // If we have no HTTP response, throw the original exception.
- if (!op.hasResult()) {
- throw e;
- }
+ // If we have no HTTP response, throw the original exception.
+ if (!op.hasResult()) {
+ throw e;
+ }
- // ref: HADOOP-18242. Rename failure occurring due to a rare case of
- // tracking metadata being in incomplete state.
- if (op.getResult().getStorageErrorCode()
- .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
- && !isMetadataIncompleteState) {
- //Logging
- ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
- .info("Rename Failure attempting to resolve tracking metadata
state and retrying.");
-
- // Doing a HEAD call resolves the incomplete metadata state and
- // then we can retry the rename operation.
- AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
- tracingContext);
- isMetadataIncompleteState = true;
- // Extract the sourceEtag, using the status Op, and set it
- // for future rename recovery.
- AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
- String sourceEtagAfterFailure =
extractEtagHeader(sourceStatusResult);
- renamePath(source, destination, continuation, tracingContext,
- sourceEtagAfterFailure, isMetadataIncompleteState);
- }
- // if we get out of the condition without a successful rename, then
- // it isn't metadata incomplete state issue.
- isMetadataIncompleteState = false;
-
- boolean etagCheckSucceeded = renameIdempotencyCheckOp(
- source,
- sourceEtag, op, destination, tracingContext);
- if (!etagCheckSucceeded) {
- // idempotency did not return different result
- // throw back the exception
- throw e;
- }
+ // ref: HADOOP-18242. Rename failure occurring due to a rare case of
+ // tracking metadata being in incomplete state.
+ if (op.getResult().getStorageErrorCode()
+ .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode())
+ && !isMetadataIncompleteState) {
+ //Logging
+ ABFS_METADATA_INCOMPLETE_RENAME_FAILURE
+ .info("Rename Failure attempting to resolve tracking metadata
state and retrying.");
+
+ // Doing a HEAD call resolves the incomplete metadata state and
+ // then we can retry the rename operation.
+ AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
+ tracingContext);
+ isMetadataIncompleteState = true;
+ // Extract the sourceEtag, using the status Op, and set it
+ // for future rename recovery.
+ AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
+ String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
+ renamePath(source, destination, continuation, tracingContext,
+ sourceEtagAfterFailure, isMetadataIncompleteState,
isNamespaceEnabled);
+ }
+ // if we get out of the condition without a successful rename, then
+ // it isn't metadata incomplete state issue.
+ isMetadataIncompleteState = false;
+
+ boolean etagCheckSucceeded = renameIdempotencyCheckOp(
+ source,
+ sourceEtag, op, destination, tracingContext, isDir);
+ if (!etagCheckSucceeded) {
+ // idempotency did not return different result
+ // throw back the exception
+ throw e;
+ }
return new AbfsClientRenameResult(op, true, isMetadataIncompleteState);
}
}
+ private boolean checkIsDir(AbfsHttpOperation result) {
+ String resourceType = result.getResponseHeader(
+ HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
+ return resourceType != null
+ && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY);
+ }
+
+ @VisibleForTesting
+ AbfsRestOperation createRenameRestOperation(URL url, List<AbfsHttpHeader>
requestHeaders) {
+ AbfsRestOperation op = new AbfsRestOperation(
+ AbfsRestOperationType.RenamePath,
+ this,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders);
+ return op;
+ }
+
private void incrementAbfsRenamePath() {
abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1);
}
/**
* Check if the rename request failure is post a retry and if earlier rename
* request might have succeeded at back-end.
- *
+ * <p>
* If a source etag was passed in, and the error was 404, get the
* etag of any file at the destination.
* If it matches the source etag, then the rename is considered
* a success.
* Exceptions raised in the probe of the destination are swallowed,
* so that they do not interfere with the original rename failures.
- * @param source source path
- * @param op Rename request REST operation response with non-null HTTP
response
- * @param destination rename destination path
- * @param sourceEtag etag of source file. may be null or empty
+ *
+ * @param source source path
+ * @param op Rename request REST operation response with
non-null HTTP response
+ * @param destination rename destination path
+ * @param sourceEtag etag of source file. may be null or empty
* @param tracingContext Tracks identifiers for request header
* @return true if the file was successfully copied
*/
public boolean renameIdempotencyCheckOp(
- final String source,
- final String sourceEtag,
- final AbfsRestOperation op,
- final String destination,
- TracingContext tracingContext) {
+ final String source,
+ final String sourceEtag,
+ final AbfsRestOperation op,
+ final String destination,
+ TracingContext tracingContext,
+ final boolean isDir) {
Review Comment:
javadoc for new param.
> ABFS: Enable config controlled ETag check for Rename idempotency
> ----------------------------------------------------------------
>
> Key: HADOOP-18012
> URL: https://issues.apache.org/jira/browse/HADOOP-18012
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/azure
> Affects Versions: 3.3.2
> Reporter: Sneha Vijayarajan
> Assignee: Sree Bhattacharyya
> Priority: Major
> Labels: pull-request-available
>
> ABFS driver has a handling for rename idempotency which relies on LMT of the
> destination file to conclude if the rename was successful or not when source
> file is absent and if the rename request had entered retry loop.
> This handling is incorrect as LMT of the destination does not change on
> rename.
> This Jira will track the change to undo the current implementation and add a
> new one where for an incoming rename operation, source file eTag is fetched
> first and then rename is done only if eTag matches for the source file.
> As this is going to be a costly operation given an extra HEAD request is
> added to each rename, this implementation will be guarded over a config and
> can enabled by customers who have workloads that do multiple renames.
> Long term plan to handle rename idempotency without HEAD request is being
> discussed.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]