amogh-jahagirdar commented on code in PR #11565: URL: https://github.com/apache/iceberg/pull/11565#discussion_r1878209991
########## gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java: ########## @@ -242,4 +248,106 @@ private void internalDeleteFiles(Stream<BlobId> blobIdsToDelete) { Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), gcpProperties.deleteBatchSize())) .forEach(batch -> client().delete(batch)); } + + @Override + public boolean recoverFile(String path) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be null or empty"); + + try { + BlobId blobId = BlobId.fromGsUtilUri(path); + + // first attempt to restore with soft-delete + if (recoverSoftDeletedObject(blobId)) { + return true; + } + + // fallback to restoring by copying the latest version + if (recoverLatestVersion(blobId)) { + return true; + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid GCS path format: {}", path, e); + } + + return false; + } + + /** + * Attempts to restore a soft-deleted object. + * + * <p>Requires {@code storage.objects.restore} permission + * + * <p>See <a + * href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverSoftDeletedObject(BlobId blobId) { + try { + Optional<Blob> latestSoftDeletedBlob = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.softDeleted(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getSoftDeleteTime)); + + if (latestSoftDeletedBlob.isPresent()) { + client().restore(latestSoftDeletedBlob.get().getBlobId()); + LOG.info("Soft delete object restored file {}", blobId); + return true; + } + LOG.warn("No soft deleted object was found"); + + } catch (StorageException e) { + LOG.warn("Failed to restore", e); + } + + return false; + } + + /** + * Attempts to restore the latest deleted object version. + * + * <p>See <a href="https://cloud.google.com/storage/docs/using-versioned-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverLatestVersion(BlobId blobId) { + try { + Optional<Blob> latestDeletedVersion = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.versions(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getUpdateTimeOffsetDateTime)) + .filter(blob -> blob.getDeleteTimeOffsetDateTime() != null); + + if (latestDeletedVersion.isPresent()) { + Storage.CopyRequest copyRequest = + Storage.CopyRequest.newBuilder() + .setSource(latestDeletedVersion.get().getBlobId()) + .setTarget(blobId) + .build(); + Blob blob = client().copy(copyRequest).getResult(); + LOG.info("Latest deleted version was restored for {}", blob.getBlobId()); + return true; + } + LOG.warn("No latest deleted version was found"); + + } catch (StorageException e) { + LOG.warn("Failed to restore latest deleted version", e); Review Comment: Same as above ########## gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java: ########## @@ -242,4 +248,106 @@ private void internalDeleteFiles(Stream<BlobId> blobIdsToDelete) { Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), gcpProperties.deleteBatchSize())) .forEach(batch -> client().delete(batch)); } + + @Override + public boolean recoverFile(String path) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be null or empty"); + + try { + BlobId blobId = BlobId.fromGsUtilUri(path); + + // first attempt to restore with soft-delete + if (recoverSoftDeletedObject(blobId)) { + return true; + } + + // fallback to restoring by copying the latest version + if (recoverLatestVersion(blobId)) { + return true; + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid GCS path format: {}", path, e); + } + + return false; + } + + /** + * Attempts to restore a soft-deleted object. + * + * <p>Requires {@code storage.objects.restore} permission + * + * <p>See <a + * href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverSoftDeletedObject(BlobId blobId) { + try { + Optional<Blob> latestSoftDeletedBlob = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.softDeleted(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getSoftDeleteTime)); + + if (latestSoftDeletedBlob.isPresent()) { + client().restore(latestSoftDeletedBlob.get().getBlobId()); + LOG.info("Soft delete object restored file {}", blobId); + return true; + } + LOG.warn("No soft deleted object was found"); + + } catch (StorageException e) { + LOG.warn("Failed to restore", e); + } + + return false; + } + + /** + * Attempts to restore the latest deleted object version. + * + * <p>See <a href="https://cloud.google.com/storage/docs/using-versioned-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverLatestVersion(BlobId blobId) { + try { + Optional<Blob> latestDeletedVersion = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.versions(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getUpdateTimeOffsetDateTime)) + .filter(blob -> blob.getDeleteTimeOffsetDateTime() != null); + + if (latestDeletedVersion.isPresent()) { + Storage.CopyRequest copyRequest = + Storage.CopyRequest.newBuilder() + .setSource(latestDeletedVersion.get().getBlobId()) + .setTarget(blobId) + .build(); + Blob blob = client().copy(copyRequest).getResult(); + LOG.info("Latest deleted version was restored for {}", blob.getBlobId()); + return true; + } + LOG.warn("No latest deleted version was found"); Review Comment: Could we include the blobID in the log? ########## gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java: ########## @@ -242,4 +248,106 @@ private void internalDeleteFiles(Stream<BlobId> blobIdsToDelete) { Streams.stream(Iterators.partition(blobIdsToDelete.iterator(), gcpProperties.deleteBatchSize())) .forEach(batch -> client().delete(batch)); } + + @Override + public boolean recoverFile(String path) { + Preconditions.checkArgument( + !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be null or empty"); + + try { + BlobId blobId = BlobId.fromGsUtilUri(path); + + // first attempt to restore with soft-delete + if (recoverSoftDeletedObject(blobId)) { + return true; + } + + // fallback to restoring by copying the latest version + if (recoverLatestVersion(blobId)) { + return true; + } + + } catch (IllegalArgumentException e) { + LOG.warn("Invalid GCS path format: {}", path, e); + } + + return false; + } + + /** + * Attempts to restore a soft-deleted object. + * + * <p>Requires {@code storage.objects.restore} permission + * + * <p>See <a + * href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a> + * + * @param blobId the blob identifier + * @return {@code true} if blob was recovered, {@code false} if not + */ + protected boolean recoverSoftDeletedObject(BlobId blobId) { + try { + Optional<Blob> latestSoftDeletedBlob = + client() + .list( + blobId.getBucket(), + Storage.BlobListOption.prefix(blobId.getName()), + Storage.BlobListOption.softDeleted(true)) + .streamAll() + .filter(blob -> blob.getName().equals(blobId.getName())) + .max(Comparator.comparing(Blob::getSoftDeleteTime)); + + if (latestSoftDeletedBlob.isPresent()) { + client().restore(latestSoftDeletedBlob.get().getBlobId()); + LOG.info("Soft delete object restored file {}", blobId); + return true; + } + LOG.warn("No soft deleted object was found"); + + } catch (StorageException e) { + LOG.warn("Failed to restore", e); Review Comment: Same as below, could we include the blobID in these log messages? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org