amogh-jahagirdar commented on code in PR #11565:
URL: https://github.com/apache/iceberg/pull/11565#discussion_r1857172654


##########
gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java:
##########
@@ -242,4 +250,116 @@ private void internalDeleteFiles(Stream<BlobId> 
blobIdsToDelete) {
     Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), 
gcpProperties.deleteBatchSize()))
         .forEach(batch -> client().delete(batch));
   }
+
+  @Override
+  public boolean recoverFile(String path) {
+    Preconditions.checkArgument(
+        !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be 
null or empty");
+
+    try {
+      BlobId blobId = BlobId.fromGsUtilUri(path);
+
+      // first attempt to restore with soft-delete
+      if (recoverSoftDeletedObject(blobId)) {
+        return true;
+      }
+
+      // fallback to restoring by copying the latest version
+      if (recoverLatestVersion(blobId)) {
+        return true;
+      }
+
+    } catch (IllegalArgumentException e) {
+      LOG.warn("Invalid GCS path format: {}", path, e);
+    }
+
+    return false;
+  }
+
+  /**
+   * Attempts to restore a soft-deleted object.
+   *
+   * <p>Requires {@code storage.objects.restore} permission
+   *
+   * <p>See <a
+   * 
href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore";>docs</a>
+   *
+   * @param blobId the blob identifier
+   * @return {@code true} if blob was recovered, {@code false} if not
+   */
+  protected boolean recoverSoftDeletedObject(BlobId blobId) {
+    try {
+      BucketInfo.SoftDeletePolicy policy = 
client().get(blobId.getBucket()).getSoftDeletePolicy();
+      if (Duration.ofSeconds(0).equals(policy.getRetentionDuration())) {
+        LOG.warn("Soft delete is disabled for {}", blobId.getBucket());
+        return false;
+      }
+
+      Optional<Blob> latestSoftDeletedBlob =
+          client()
+              .list(
+                  blobId.getBucket(),
+                  Storage.BlobListOption.prefix(blobId.getName()),
+                  Storage.BlobListOption.softDeleted(true))
+              .streamAll()
+              .filter(blob -> blob.getName().equals(blobId.getName()))
+              .max(Comparator.comparing(Blob::getSoftDeleteTime));
+
+      if (latestSoftDeletedBlob.isPresent()) {
+        client().restore(latestSoftDeletedBlob.get().getBlobId());
+        LOG.info("Soft delete object restored file {}", blobId);
+        return true;
+      }
+      LOG.warn("No soft deleted object was found");
+
+    } catch (StorageException e) {
+      LOG.warn("Failed to restore", e);
+    }
+
+    return false;
+  }
+
+  /**
+   * Attempts to restore the latest deleted object version.
+   *
+   * <p>See <a 
href="https://cloud.google.com/storage/docs/using-versioned-objects#restore";>docs</a>
+   *
+   * @param blobId the blob identifier
+   * @return {@code true} if blob was recovered, {@code false} if not
+   */
+  protected boolean recoverLatestVersion(BlobId blobId) {
+    try {
+      if (!client().get(blobId.getBucket()).versioningEnabled()) {
+        LOG.warn("Object versioning is disabled for {}", blobId.getBucket());
+        return false;
+      }
+
+      Optional<Blob> latestVersion =
+          client()
+              .list(
+                  blobId.getBucket(),
+                  Storage.BlobListOption.prefix(blobId.getName()),
+                  Storage.BlobListOption.versions(true))
+              .streamAll()
+              .filter(blob -> blob.getName().equals(blobId.getName()))
+              .max(Comparator.comparing(Blob::getUpdateTimeOffsetDateTime));
+
+      if (latestVersion.isPresent() && 
latestVersion.get().getDeleteTimeOffsetDateTime() != null) {

Review Comment:
   Could we embed the `latestVersion.getDeleteTimeOffsetDateTime` check in the 
filter above? Then it's the latestVersionBeforeDeletion  or 
latestVersionBeforeRemoval 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to