commit:     a87be47f7d3245050da43d7c3ab4760d47e9fac5
Author:     gcarq <egger.m <AT> protonmail <DOT> com>
AuthorDate: Tue Feb 21 00:04:26 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Tue May 23 00:22:09 2023 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=a87be47f

mergeme: Don't overwrite files if the content matches

Uses filecmp.cmp(shallow=False) to compare file contents and
doesn't replace them if they are equal. This results in less disk
churn and helps to keep filesystem snapshots as small as possible.

Closes: https://bugs.gentoo.org/722270
Signed-off-by: gcarq <egger.m <AT> protonmail.com>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/dbapi/vartree.py | 47 +++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/lib/portage/dbapi/vartree.py b/lib/portage/dbapi/vartree.py
index a9e332a74..327b72bed 100644
--- a/lib/portage/dbapi/vartree.py
+++ b/lib/portage/dbapi/vartree.py
@@ -3,6 +3,8 @@
 
 __all__ = ["vardbapi", "vartree", "dblink"] + ["write_contents", 
"tar_contents"]
 
+import filecmp
+
 import portage
 
 portage.proxy.lazyimport.lazyimport(
@@ -5800,28 +5802,33 @@ class dblink:
                 # whether config protection or not, we merge the new file the
                 # same way.  Unless moveme=0 (blocking directory)
                 if moveme:
-                    # Create hardlinks only for source files that already exist
-                    # as hardlinks (having identical st_dev and st_ino).
-                    hardlink_key = (mystat.st_dev, mystat.st_ino)
+                    # only replace the existing file if it differs, see #722270
+                    already_merged = os.path.exists(mydest)
+                    if already_merged and filecmp.cmp(mysrc, mydest, 
shallow=False):
+                        zing = "==="
+                    else:
+                        # Create hardlinks only for source files that already 
exist
+                        # as hardlinks (having identical st_dev and st_ino).
+                        hardlink_key = (mystat.st_dev, mystat.st_ino)
 
-                    hardlink_candidates = 
self._hardlink_merge_map.get(hardlink_key)
-                    if hardlink_candidates is None:
-                        hardlink_candidates = []
-                        self._hardlink_merge_map[hardlink_key] = 
hardlink_candidates
+                        hardlink_candidates = 
self._hardlink_merge_map.get(hardlink_key)
+                        if hardlink_candidates is None:
+                            hardlink_candidates = []
+                            self._hardlink_merge_map[hardlink_key] = 
hardlink_candidates
 
-                    mymtime = movefile(
-                        mysrc,
-                        mydest,
-                        newmtime=thismtime,
-                        sstat=mystat,
-                        mysettings=self.settings,
-                        hardlink_candidates=hardlink_candidates,
-                        encoding=_encodings["merge"],
-                    )
-                    if mymtime is None:
-                        return 1
-                    hardlink_candidates.append(mydest)
-                    zing = ">>>"
+                        mymtime = movefile(
+                            mysrc,
+                            mydest,
+                            newmtime=thismtime,
+                            sstat=mystat,
+                            mysettings=self.settings,
+                            hardlink_candidates=hardlink_candidates,
+                            encoding=_encodings["merge"],
+                        )
+                        if mymtime is None:
+                            return 1
+                        hardlink_candidates.append(mydest)
+                        zing = ">>>"
 
                     try:
                         self._merged_path(mydest, os.lstat(mydest))

Reply via email to