branch: externals/dired-duplicates
commit 23f42d68bedb4cb8216a5896ae1acb1360b61044
Author: Harald Judt <[email protected]>
Commit: Harald Judt <[email protected]>

    Further refactor code for finding duplicates
    
    Signed-off-by: Harald Judt <[email protected]>
---
 dired-duplicates.el | 65 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/dired-duplicates.el b/dired-duplicates.el
index 031ed946a9..56a4c82aa1 100644
--- a/dired-duplicates.el
+++ b/dired-duplicates.el
@@ -187,15 +187,28 @@ temporary buffer for the hash calculation."
               (seq-remove #'file-directory-p (directory-files d t nil t))))
           directories))
 
-(defun dired-duplicates--find-and-filter-files (directories)
-  "Search below DIRECTORIES for duplicate files.
-
-It is possible to provide one or more root DIRECTORIES.  Any file
-filter functions will be applied before checking for duplicates.
-Return a hash-table with the checksums as keys and a list of size
-and duplicate files as values."
-  (cl-loop with files = (dired-duplicates--apply-file-filter-functions
-                         (dired-duplicates--find-files directories))
+(defun dired-duplicates--find-checksum-exec (file table)
+  "Detect whether checksum exec is available for FILE location.
+
+The results will be cached in the hash TABLE for faster access."
+  (let* ((key (file-remote-p file))
+         (path (gethash key table 'does-not-exist)))
+    (if (eq path 'does-not-exist)
+        (let* ((default-directory (file-name-directory (expand-file-name 
file)))
+               (exec (executable-find dired-duplicates-checksum-exec t)))
+          (setf (gethash key table) exec)
+          (unless exec
+            (message "Checksum program %s not found in exec-path, falling back 
to internal routines" exec))
+          exec)
+      path)))
+
+(defun dired-duplicates--detect-duplicates (files)
+  "Find duplicates given a list of FILES.
+
+Any file filter functions will be applied before checking for
+duplicates.  Return a hash-table with the checksums as keys and a
+list of size and duplicate files as values."
+  (cl-loop with files = (dired-duplicates--apply-file-filter-functions files)
            and same-size-table = (make-hash-table)
            and checksum-table = (make-hash-table :test 'equal)
            for f in files
@@ -205,28 +218,20 @@ and duplicate files as values."
            do (setf (gethash size same-size-table)
                     (push f (gethash size same-size-table)))
            finally
-           (cl-loop with checksum-exec-availability = (make-hash-table :test 
'equal)
-                    initially do
-                    (cl-loop for d in directories do
-                             (let* ((default-directory (file-name-directory 
(expand-file-name d)))
-                                    (exec (executable-find 
dired-duplicates-checksum-exec t)))
-                               (if exec
-                                   (setf (gethash (file-remote-p d) 
checksum-exec-availability) exec)
-                                 (message "Checksum program %s not found in 
exec-path, falling back to internal routines" exec))))
-
+           (cl-loop with checksum-exec-paths = (make-hash-table :test 'equal)
                     for same-size-files being the hash-value in 
same-size-table using (hash-key size)
                     if (cdr same-size-files) do
                     (cl-loop for f in same-size-files
-                             for checksum-path = (gethash (file-remote-p f) 
checksum-exec-availability)
-                             for checksum = (if checksum-path
-                                                  
(dired-duplicates--checksum-file f checksum-path)
-                                                (if (<= size 
dired-duplicates-internal-checksumming-size-limit)
-                                                    
(dired-duplicates--checksum-file f nil)
-                                                  (warn "File %s is too big to 
checksum using internal functions, skipping." f)
-                                                  nil))
+                             for exec = (dired-duplicates--find-checksum-exec 
f checksum-exec-paths)
+                             for checksum = (if exec
+                                                
(dired-duplicates--checksum-file f exec)
+                                              (if (<= size 
dired-duplicates-internal-checksumming-size-limit)
+                                                  
(dired-duplicates--checksum-file f nil)
+                                                (warn "File %s is too big to 
checksum using internal functions, skipping." f)
+                                                nil))
                              when checksum do
-                               (setf (gethash checksum checksum-table)
-                                     (push f (gethash checksum 
checksum-table)))))
+                             (setf (gethash checksum checksum-table)
+                                   (push f (gethash checksum 
checksum-table)))))
            (cl-loop for same-files being the hash-value in checksum-table 
using (hash-key checksum)
                     do
                     (if (cdr same-files)
@@ -238,9 +243,9 @@ and duplicate files as values."
 
 (defun dired-duplicates--generate-grouped-results (&optional directories)
   "Generate a list of grouped duplicate files in DIRECTORIES."
-  (cl-loop with dupes-table = (dired-duplicates--find-and-filter-files
-                               (or directories
-                                   dired-duplicates-directories))
+  (cl-loop with directories = (or directories dired-duplicates-directories)
+           with dupes-table = (dired-duplicates--detect-duplicates
+                               (dired-duplicates--find-files directories))
            with sorted-sums = (cl-sort
                                (cl-loop for k being the hash-key in 
dupes-table using (hash-value v)
                                         collect (list k (car v)))

Reply via email to