paveon commented on code in PR #1133:
URL: https://github.com/apache/iceberg-go/pull/1133#discussion_r3328876927
##########
table/orphan_cleanup.go:
##########
@@ -195,24 +202,62 @@ func (t Table) executeOrphanCleanup(ctx context.Context,
cfg *orphanCleanupConfi
scanLocation = t.metadata.Location()
}
- referencedFiles, err := t.getReferencedFiles(fs, true)
- if err != nil {
- return OrphanCleanupResult{}, fmt.Errorf("failed to get
referenced files: %w", err)
+ // Run the S3 walk and referenced-file collection concurrently.
+ var referencedFiles map[string]bool
+ var scannedFiles []scannedFile
+
+ g, gctx := errgroup.WithContext(ctx)
+ g.Go(func() error {
+ var err error
+ referencedFiles, err = t.getReferencedFiles(gctx, fs,
cfg.maxConcurrency, true)
+
+ return err
+ })
+
+ g.Go(func() error {
+ cutoff := time.Now().Add(-cfg.olderThan)
+
+ return walkDirectory(fs, scanLocation, func(path string, info
stdfs.FileInfo) error {
+ if gctx.Err() != nil {
+ return gctx.Err()
+ }
+ if info.IsDir() || !info.ModTime().Before(cutoff) {
+ return nil
+ }
+ scannedFiles = append(scannedFiles, scannedFile{path:
path, size: info.Size()})
Review Comment:
Good point, added short comment.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]