[
https://issues.apache.org/jira/browse/HADOOP-18012?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17702664#comment-17702664
]
ASF GitHub Bot commented on HADOOP-18012:
-----------------------------------------
sreeb-msft commented on code in PR #5488:
URL: https://github.com/apache/hadoop/pull/5488#discussion_r1142061991
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java:
##########
@@ -123,6 +138,112 @@ public void testRenameFailuresDueToIncompleteMetadata()
throws Exception {
}
+ AbfsClient getMockAbfsClient() throws IOException {
+ AzureBlobFileSystem fs = getFileSystem();
+
+ // specifying AbfsHttpOperation mock behavior
+
+ // mock object representing the 404 path not found result
+ AbfsHttpOperation mockHttp404Op = Mockito.mock(AbfsHttpOperation.class);
+ Mockito.doReturn(404).when(mockHttp404Op).getStatusCode();
+
Mockito.doNothing().when(mockHttp404Op).processResponse(nullable(byte[].class),
Mockito.any(int.class), Mockito.any(int.class));
+
Mockito.doNothing().when(mockHttp404Op).setRequestProperty(nullable(String.class),
nullable(String.class));
+
Mockito.doNothing().when(mockHttp404Op).sendRequest(nullable(byte[].class),
Mockito.any(int.class), Mockito.any(int.class));
+ Mockito.doReturn("PUT").when(mockHttp404Op).getMethod();
+ Mockito.doReturn("Source Path not
found").when(mockHttp404Op).getStorageErrorMessage();
+
Mockito.doReturn("SourcePathNotFound").when(mockHttp404Op).getStorageErrorCode();
+
+
+ // // mock object representing the 500 timeout result for first try of
rename
+ AbfsHttpOperation mockHttp500Op = Mockito.mock(AbfsHttpOperation.class);
+ Mockito.doReturn(500).when(mockHttp500Op).getStatusCode();
+ Mockito.doThrow(IOException.class)
+ .when(mockHttp500Op).processResponse(nullable(byte[].class),
Mockito.any(int.class), Mockito.any(int.class));
+
Mockito.doNothing().when(mockHttp500Op).setRequestProperty(nullable(String.class),
nullable(String.class));
+
Mockito.doNothing().when(mockHttp500Op).sendRequest(nullable(byte[].class),
Mockito.any(int.class), Mockito.any(int.class));
+ Mockito.doReturn("PUT").when(mockHttp500Op).getMethod();
+
Mockito.doReturn("ClientTimeoutError").when(mockHttp500Op).getStorageErrorCode();
+
+ // creating mock HttpUrlConnection object
+ HttpURLConnection mockUrlConn = Mockito.mock(HttpsURLConnection.class);
+
+ // tying all mocks together
+ Mockito.doReturn(mockUrlConn).when(mockHttp404Op).getConnection();
+ Mockito.doReturn(mockUrlConn).when(mockHttp500Op).getConnection();
+
+ // adding mock objects to current AbfsClient
+ AbfsClient spyClient = Mockito.spy(fs.getAbfsStore().getClient());
+ // Rest Operation is spied as it needs to have spyclient instance as a
param to the constructor
+ // directly returning a mock for this would make the client instance null
+ AbfsRestOperation mockRestOp = Mockito.spy(new AbfsRestOperation(
+ AbfsRestOperationType.RenamePath,
+ spyClient,
+ HTTP_METHOD_PUT,
+ null,
+ null)
+ );
+
Mockito.doReturn(mockRestOp).when(spyClient).createRenameRestOperation(nullable(URL.class),
nullable(List.class));
+
+
Mockito.doReturn(mockHttp500Op).doReturn(mockHttp404Op).when(mockRestOp).createHttpOperation();
+
Mockito.doReturn(mockHttp500Op).doReturn(mockHttp404Op).when(mockRestOp).getResult();
+
+ Mockito.doReturn(true).when(mockRestOp).hasResult();
+
+
+ SharedKeyCredentials mockSharedKeyCreds = mock(SharedKeyCredentials.class);
+
Mockito.doNothing().when(mockSharedKeyCreds).signRequest(Mockito.any(HttpURLConnection.class),
Mockito.any(long.class));
+ // real method calls made once at start and once at end
+ // for the two getPathStatus calls that actually have to be made
+
Mockito.doCallRealMethod().doReturn(mockSharedKeyCreds).doReturn(mockSharedKeyCreds).doCallRealMethod().when(spyClient).getSharedKeyCredentials();
+
+ return spyClient;
+
+ }
+
+ @Test
+ public void testRenameRecoverySrcDestEtagSame() throws IOException {
+ AzureBlobFileSystem fs = getFileSystem();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+
Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient mockClient = getMockAbfsClient();
+
+
+ String path1 = "/dummyFile1";
+ String path2 = "/dummyFile2";
+
+ fs.create(new Path(path1));
+ fs.create(new Path(path2));
+
+ // 404 and retry, send sourceEtag as null
+ // source eTag matches -> rename should pass even when execute throws
exception
+ mockClient.renamePath(path1, path1, null, testTracingContext, null, false);
+ }
+
+ @Test
+ public void testRenameRecoverySrcDestEtagDifferent() throws IOException {
+ AzureBlobFileSystem fs = getFileSystem();
+ TracingContext testTracingContext = getTestTracingContext(fs, false);
+
+
Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext));
+
+ AbfsClient spyClient = getMockAbfsClient();
+
+ String path1 = "/dummyFile1";
+ String path2 = "/dummyFile2";
+
+ fs.create(new Path(path1));
+ fs.create(new Path(path2));
+
+ // source eTag does not match -> throw exception
+ try {
+ spyClient.renamePath(path1, path2,null, testTracingContext, null, false);
Review Comment:
Have made changes to test at the fs level.
> ABFS: Enable config controlled ETag check for Rename idempotency
> ----------------------------------------------------------------
>
> Key: HADOOP-18012
> URL: https://issues.apache.org/jira/browse/HADOOP-18012
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/azure
> Affects Versions: 3.3.2
> Reporter: Sneha Vijayarajan
> Assignee: Sree Bhattacharyya
> Priority: Major
> Labels: pull-request-available
>
> ABFS driver has a handling for rename idempotency which relies on LMT of the
> destination file to conclude if the rename was successful or not when source
> file is absent and if the rename request had entered retry loop.
> This handling is incorrect as LMT of the destination does not change on
> rename.
> This Jira will track the change to undo the current implementation and add a
> new one where for an incoming rename operation, source file eTag is fetched
> first and then rename is done only if eTag matches for the source file.
> As this is going to be a costly operation given an extra HEAD request is
> added to each rename, this implementation will be guarded over a config and
> can enabled by customers who have workloads that do multiple renames.
> Long term plan to handle rename idempotency without HEAD request is being
> discussed.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]