paveon commented on code in PR #1133:
URL: https://github.com/apache/iceberg-go/pull/1133#discussion_r3328876927


##########
table/orphan_cleanup.go:
##########
@@ -195,24 +202,62 @@ func (t Table) executeOrphanCleanup(ctx context.Context, 
cfg *orphanCleanupConfi
                scanLocation = t.metadata.Location()
        }
 
-       referencedFiles, err := t.getReferencedFiles(fs, true)
-       if err != nil {
-               return OrphanCleanupResult{}, fmt.Errorf("failed to get 
referenced files: %w", err)
+       // Run the S3 walk and referenced-file collection concurrently.
+       var referencedFiles map[string]bool
+       var scannedFiles []scannedFile
+
+       g, gctx := errgroup.WithContext(ctx)
+       g.Go(func() error {
+               var err error
+               referencedFiles, err = t.getReferencedFiles(gctx, fs, 
cfg.maxConcurrency, true)
+
+               return err
+       })
+
+       g.Go(func() error {
+               cutoff := time.Now().Add(-cfg.olderThan)
+
+               return walkDirectory(fs, scanLocation, func(path string, info 
stdfs.FileInfo) error {
+                       if gctx.Err() != nil {
+                               return gctx.Err()
+                       }
+                       if info.IsDir() || !info.ModTime().Before(cutoff) {
+                               return nil
+                       }
+                       scannedFiles = append(scannedFiles, scannedFile{path: 
path, size: info.Size()})

Review Comment:
   Good point, added short comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to