amogh-jahagirdar commented on code in PR #11565: URL: https://github.com/apache/iceberg/pull/11565#discussion_r1857172654
########## gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java: ########## @@ -242,4 +250,116 @@ private void internalDeleteFiles(Stream<BlobId> blobIdsToDelete) { Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), gcpProperties.deleteBatchSize())) .forEach(batch -> client().delete(batch)); } + + @Override + public boolean recoverFile(String path) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be null or empty"); + + try { + BlobId blobId = BlobId.fromGsUtilUri(path); + + // first attempt to restore with soft-delete + if (recoverSoftDeletedObject(blobId)) { + return true; + } + + // fallback to restoring by copying the latest version + if (recoverLatestVersion(blobId)) { + return true; + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid GCS path format: {}", path, e); + } + + return false; + } + + /** + * Attempts to restore a soft-deleted object. + * + * <p>Requires {@code storage.objects.restore} permission + * + * <p>See <a + * href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverSoftDeletedObject(BlobId blobId) { + try { + BucketInfo.SoftDeletePolicy policy = client().get(blobId.getBucket()).getSoftDeletePolicy(); + if (Duration.ofSeconds(0).equals(policy.getRetentionDuration())) { + LOG.warn("Soft delete is disabled for {}", blobId.getBucket()); + return false; + } + + Optional<Blob> latestSoftDeletedBlob = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.softDeleted(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getSoftDeleteTime)); + + if (latestSoftDeletedBlob.isPresent()) { + client().restore(latestSoftDeletedBlob.get().getBlobId()); + LOG.info("Soft delete object restored file {}", blobId); + return true; + } + LOG.warn("No soft deleted object was found"); + + } catch (StorageException e) { + LOG.warn("Failed to restore", e); + } + + return false; + } + + /** + * Attempts to restore the latest deleted object version. + * + * <p>See <a href="https://cloud.google.com/storage/docs/using-versioned-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverLatestVersion(BlobId blobId) { + try { + if (!client().get(blobId.getBucket()).versioningEnabled()) { + LOG.warn("Object versioning is disabled for {}", blobId.getBucket()); + return false; + } + + Optional<Blob> latestVersion = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.versions(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getUpdateTimeOffsetDateTime)); + + if (latestVersion.isPresent() && latestVersion.get().getDeleteTimeOffsetDateTime() != null) { Review Comment: Could we embed the `latestVersion.getDeleteTimeOffsetDateTime` check in the filter above? Then it's the latestVersionBeforeDeletion -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org