commit:     ced2e6d4f4ac95b8e17cf7dae964a64037a85bf0
Author:     Alexey Gladkov <legion <AT> kernel <DOT> org>
AuthorDate: Mon Mar 11 17:09:05 2024 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Apr 28 00:04:09 2024 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=ced2e6d4

sync/zipfile: Recycle files that have not changed

We can check whether the content of files from the archive differs from
the current revision. This will give us several advantages:

* This will give us some meaning to the mtime of files, since it will
prevent the timestamps of unmodified files from being changed.

* This will also get rid of recreatiing self.repo.location, which will
allow sync with FEATURES=usersync because self.repo.location is reused.

Suggested-by: Zac Medico <zmedico <AT> gentoo.org>
Signed-off-by: Alexey Gladkov <legion <AT> kernel.org>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 lib/portage/sync/modules/zipfile/zipfile.py | 32 ++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py 
b/lib/portage/sync/modules/zipfile/zipfile.py
index 3cd210a64b..edfb5aa681 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -35,6 +35,16 @@ class ZipFile(SyncBase):
             return (os.EX_OK, info["etag"][0])
         return (1, False)
 
+    def _do_cmp(self, f1, f2):
+        bufsize = 8 * 1024
+        while True:
+            b1 = f1.read(bufsize)
+            b2 = f2.read(bufsize)
+            if b1 != b2:
+                return False
+            if not b1:
+                return True
+
     def sync(self, **kwargs):
         """Sync the repository"""
         if kwargs:
@@ -76,7 +86,15 @@ class ZipFile(SyncBase):
             return (1, False)
 
         # Drop previous tree
-        shutil.rmtree(self.repo.location)
+        tempdir = tempfile.mkdtemp(prefix=".temp", dir=self.repo.location)
+        tmpname = os.path.basename(tempdir)
+
+        for name in os.listdir(self.repo.location):
+            if name != tmpname:
+                os.rename(
+                    os.path.join(self.repo.location, name),
+                    os.path.join(tempdir, name),
+                )
 
         with zipfile.ZipFile(zip_file) as archive:
             strip_comp = 0
@@ -101,9 +119,21 @@ class ZipFile(SyncBase):
                     continue
 
                 with archive.open(n) as srcfile:
+                    prvpath = os.path.join(tempdir, *parts[strip_comp:])
+
+                    if os.path.exists(prvpath):
+                        with open(prvpath, "rb") as prvfile:
+                            if self._do_cmp(prvfile, srcfile):
+                                os.rename(prvpath, dstpath)
+                                continue
+                        srcfile.seek(0)
+
                     with open(dstpath, "wb") as dstfile:
                         shutil.copyfileobj(srcfile, dstfile)
 
+        # Drop previous tree
+        shutil.rmtree(tempdir)
+
         with open(os.path.join(self.repo.location, ".info"), "w") as infofile:
             if etag:
                 infofile.write(f"etag {etag}\n")

Reply via email to