amogh-jahagirdar commented on code in PR #11565: URL: https://github.com/apache/iceberg/pull/11565#discussion_r1857166857
########## gcp/src/test/java/org/apache/iceberg/gcp/gcs/GCSFileIOTest.java: ########## @@ -270,4 +283,136 @@ public void refreshCredentialsEndpointSetButRefreshDisabled() { assertThat(client.getOptions().getCredentials()).isInstanceOf(OAuth2Credentials.class); } + + @Test + void recoverSoftDeletedObject_WhenSoftDeleteDisabled_ReturnsFalse() { + BlobId blobId = BlobId.of(TEST_BUCKET, "object"); + Bucket bucket = mock(Bucket.class); + BucketInfo.SoftDeletePolicy policy = mock(BucketInfo.SoftDeletePolicy.class); + + when(storage.get(TEST_BUCKET)).thenReturn(bucket); + when(bucket.getSoftDeletePolicy()).thenReturn(policy); + when(policy.getRetentionDuration()).thenReturn(Duration.ofSeconds(0)); // soft-delete disabled + + assertThat(io.recoverSoftDeletedObject(blobId)).isFalse(); + } + + @Test + void recoverSoftDeletedObject_WhenSoftDeletedBlobExists_ReturnsTrue() { Review Comment: I feel like we can shrink the method names here and remove the underscore separators as part of that, just so it fits the existing naming pattern in these tests. e.g. `recoverSoftDeletedObject` ########## gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java: ########## @@ -242,4 +250,116 @@ private void internalDeleteFiles(Stream<BlobId> blobIdsToDelete) { Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), gcpProperties.deleteBatchSize())) .forEach(batch -> client().delete(batch)); } + + @Override + public boolean recoverFile(String path) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be null or empty"); + + try { + BlobId blobId = BlobId.fromGsUtilUri(path); + + // first attempt to restore with soft-delete + if (recoverSoftDeletedObject(blobId)) { + return true; + } + + // fallback to restoring by copying the latest version + if (recoverLatestVersion(blobId)) { + return true; + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid GCS path format: {}", path, e); + } + + return false; + } + + /** + * Attempts to restore a soft-deleted object. + * + * <p>Requires {@code storage.objects.restore} permission + * + * <p>See <a + * href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverSoftDeletedObject(BlobId blobId) { + try { + BucketInfo.SoftDeletePolicy policy = client().get(blobId.getBucket()).getSoftDeletePolicy(); + if (Duration.ofSeconds(0).equals(policy.getRetentionDuration())) { Review Comment: Actually do we need the soft delete policy check to begin with? Is it not possible to jump directly to attempting the listing of versions with softDeleted(true) as done below, and if nothing gets returned we know that recovering via soft delete wasn't effective? Or will the list versions call fail? ########## gcp/src/test/java/org/apache/iceberg/gcp/gcs/GCSFileIOTest.java: ########## @@ -270,4 +283,136 @@ public void refreshCredentialsEndpointSetButRefreshDisabled() { assertThat(client.getOptions().getCredentials()).isInstanceOf(OAuth2Credentials.class); } + + @Test + void recoverSoftDeletedObject_WhenSoftDeleteDisabled_ReturnsFalse() { + BlobId blobId = BlobId.of(TEST_BUCKET, "object"); + Bucket bucket = mock(Bucket.class); + BucketInfo.SoftDeletePolicy policy = mock(BucketInfo.SoftDeletePolicy.class); + + when(storage.get(TEST_BUCKET)).thenReturn(bucket); + when(bucket.getSoftDeletePolicy()).thenReturn(policy); + when(policy.getRetentionDuration()).thenReturn(Duration.ofSeconds(0)); // soft-delete disabled + + assertThat(io.recoverSoftDeletedObject(blobId)).isFalse(); + } + + @Test + void recoverSoftDeletedObject_WhenSoftDeletedBlobExists_ReturnsTrue() { + BlobId blobId = BlobId.of(TEST_BUCKET, "object"); + Bucket bucket = mock(Bucket.class); + BucketInfo.SoftDeletePolicy policy = mock(BucketInfo.SoftDeletePolicy.class); + Page<Blob> page = mock(Page.class); + Blob softDeletedBlob = mock(Blob.class); + + when(storage.get(TEST_BUCKET)).thenReturn(bucket); + when(bucket.getSoftDeletePolicy()).thenReturn(policy); + when(policy.getRetentionDuration()).thenReturn(Duration.ofDays(7)); // soft-delete enabled + doAnswer(invoke -> page).when(storage).list(eq(TEST_BUCKET), any(), any()); + when(page.streamAll()).thenReturn(Stream.of(softDeletedBlob)); + when(softDeletedBlob.getName()).thenReturn("object"); + when(softDeletedBlob.getSoftDeleteTime()).thenReturn(OffsetDateTime.now()); + when(softDeletedBlob.getBlobId()).thenReturn(blobId); + doAnswer(invocation -> softDeletedBlob).when(storage).restore(any(), any()); + + assertThat(io.recoverSoftDeletedObject(blobId)).isTrue(); + } + + @Test + void recoverSoftDeletedObject_WhenNoSoftDeletedBlobExists_ReturnsFalse() { Review Comment: `cannotRecoverSoftDeletedObjectWhenNonExistant` ########## gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java: ########## @@ -242,4 +250,116 @@ private void internalDeleteFiles(Stream<BlobId> blobIdsToDelete) { Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), gcpProperties.deleteBatchSize())) .forEach(batch -> client().delete(batch)); } + + @Override + public boolean recoverFile(String path) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be null or empty"); + + try { + BlobId blobId = BlobId.fromGsUtilUri(path); + + // first attempt to restore with soft-delete + if (recoverSoftDeletedObject(blobId)) { + return true; + } + + // fallback to restoring by copying the latest version + if (recoverLatestVersion(blobId)) { + return true; + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid GCS path format: {}", path, e); + } + + return false; + } + + /** + * Attempts to restore a soft-deleted object. + * + * <p>Requires {@code storage.objects.restore} permission + * + * <p>See <a + * href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverSoftDeletedObject(BlobId blobId) { + try { + BucketInfo.SoftDeletePolicy policy = client().get(blobId.getBucket()).getSoftDeletePolicy(); + if (Duration.ofSeconds(0).equals(policy.getRetentionDuration())) { Review Comment: I think we'll need a null check for policy == null before so I think it should be ``` if (policy == null || Duration.ofSeconds(0).equals(policy.getRetentionDuration())) { return null } ``` The GCP sdk https://github.com/googleapis/google-api-java-client-services/blob/350652f6480c1dddf1c7dc2a966434deaeaa4964/clients/google-api-services-storage/v1/2.0.0/com/google/api/services/storage/model/Bucket.java#L911 API docs indicate it's that the soft delete policy will be null in case it doesn't exist. Looks like Beam does the same check (inverted) as well https://github.com/apache/beam/blob/78630d15445fdf54935d6ba99c2fd9d0930b6af8/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcpOptions.java#L419 ########## gcp/src/test/java/org/apache/iceberg/gcp/gcs/GCSFileIOTest.java: ########## @@ -270,4 +283,136 @@ public void refreshCredentialsEndpointSetButRefreshDisabled() { assertThat(client.getOptions().getCredentials()).isInstanceOf(OAuth2Credentials.class); } + + @Test + void recoverSoftDeletedObject_WhenSoftDeleteDisabled_ReturnsFalse() { + BlobId blobId = BlobId.of(TEST_BUCKET, "object"); + Bucket bucket = mock(Bucket.class); + BucketInfo.SoftDeletePolicy policy = mock(BucketInfo.SoftDeletePolicy.class); + + when(storage.get(TEST_BUCKET)).thenReturn(bucket); + when(bucket.getSoftDeletePolicy()).thenReturn(policy); + when(policy.getRetentionDuration()).thenReturn(Duration.ofSeconds(0)); // soft-delete disabled + + assertThat(io.recoverSoftDeletedObject(blobId)).isFalse(); + } + + @Test + void recoverSoftDeletedObject_WhenSoftDeletedBlobExists_ReturnsTrue() { + BlobId blobId = BlobId.of(TEST_BUCKET, "object"); + Bucket bucket = mock(Bucket.class); + BucketInfo.SoftDeletePolicy policy = mock(BucketInfo.SoftDeletePolicy.class); + Page<Blob> page = mock(Page.class); + Blob softDeletedBlob = mock(Blob.class); + + when(storage.get(TEST_BUCKET)).thenReturn(bucket); + when(bucket.getSoftDeletePolicy()).thenReturn(policy); + when(policy.getRetentionDuration()).thenReturn(Duration.ofDays(7)); // soft-delete enabled + doAnswer(invoke -> page).when(storage).list(eq(TEST_BUCKET), any(), any()); + when(page.streamAll()).thenReturn(Stream.of(softDeletedBlob)); + when(softDeletedBlob.getName()).thenReturn("object"); + when(softDeletedBlob.getSoftDeleteTime()).thenReturn(OffsetDateTime.now()); + when(softDeletedBlob.getBlobId()).thenReturn(blobId); + doAnswer(invocation -> softDeletedBlob).when(storage).restore(any(), any()); + + assertThat(io.recoverSoftDeletedObject(blobId)).isTrue(); + } + + @Test + void recoverSoftDeletedObject_WhenNoSoftDeletedBlobExists_ReturnsFalse() { + BlobId blobId = BlobId.of(TEST_BUCKET, "object"); + Bucket bucket = mock(Bucket.class); + BucketInfo.SoftDeletePolicy policy = mock(BucketInfo.SoftDeletePolicy.class); + Page<Blob> page = mock(Page.class); + + when(storage.get(TEST_BUCKET)).thenReturn(bucket); + when(bucket.getSoftDeletePolicy()).thenReturn(policy); + when(policy.getRetentionDuration()).thenReturn(Duration.ofDays(7)); // soft-delete enabled + when(page.streamAll()).thenReturn(Stream.empty()); + + assertThat(io.recoverSoftDeletedObject(blobId)).isFalse(); + } + + @Test + void recoverSoftDelete_WhenStorageException_ReturnsFalse() { Review Comment: `cannotRecoverSoftDeletedWhenStorageExceptionThrown` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org