commit:     0b7eda500a0dcb98a67f33bf9ef25b202b358986
Author:     Arfrever Frehtes Taifersar Arahesis <Arfrever <AT> Apache <DOT> Org>
AuthorDate: Wed Aug  7 17:06:11 2019 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sun Aug 11 19:11:47 2019 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=0b7eda50

dblink._collision_protect: Detect internal collisions.

Implement detection of internal collisions (between files of the same package,
located in separate directories in the installation image (${D}) corresponding
to merged directories in the target filesystem (${ROOT})).

This provides protection against overwriting some files when performing merging
of files from ${D} to ${ROOT} in some filesystem layouts (such as /-merged 
layout
or /usr-merged layout).

Internal collisions between identical files are silently ignored.

Bug: https://bugs.gentoo.org/690484
Signed-off-by: Arfrever Frehtes Taifersar Arahesis <Arfrever <AT> Apache.Org>
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>

 lib/portage/dbapi/vartree.py       |  55 ++++++++++++++++++--
 lib/portage/util/_compare_files.py | 103 +++++++++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+), 4 deletions(-)

diff --git a/lib/portage/dbapi/vartree.py b/lib/portage/dbapi/vartree.py
index e2fce7736..4f069474b 100644
--- a/lib/portage/dbapi/vartree.py
+++ b/lib/portage/dbapi/vartree.py
@@ -30,6 +30,7 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.util:apply_secpass_permissions,ConfigProtect,ensure_dirs,' + \
                
'writemsg,writemsg_level,write_atomic,atomic_ofstream,writedict,' + \
                'grabdict,normalize_path,new_protect_filename',
+       'portage.util._compare_files:compare_files',
        'portage.util.digraph:digraph',
        'portage.util.env_update:env_update',
        'portage.util.install_mask:install_mask_dir,InstallMask',
@@ -87,6 +88,7 @@ import io
 from itertools import chain
 import logging
 import os as _os
+import operator
 import platform
 import pwd
 import re
@@ -3418,6 +3420,8 @@ class dblink(object):
 
                        os = _os_merge
 
+                       real_relative_paths = {}
+
                        collision_ignore = []
                        for x in portage.util.shlex_split(
                                self.settings.get("COLLISION_IGNORE", "")):
@@ -3469,8 +3473,13 @@ class dblink(object):
                                        previous = current
                                        progress_shown = True
 
-                               dest_path = normalize_path(
-                                       os.path.join(destroot, 
f.lstrip(os.path.sep)))
+                               dest_path = 
normalize_path(os.path.join(destroot, f.lstrip(os.path.sep)))
+
+                               # Relative path with symbolic links resolved 
only in parent directories
+                               real_relative_path = 
os.path.join(os.path.realpath(os.path.dirname(dest_path)),
+                                       
os.path.basename(dest_path))[len(destroot):]
+
+                               
real_relative_paths.setdefault(real_relative_path, 
[]).append(f.lstrip(os.path.sep))
 
                                parent = os.path.dirname(dest_path)
                                if parent not in dirs:
@@ -3556,9 +3565,24 @@ class dblink(object):
                                                        break
                                        if stopmerge:
                                                collisions.append(f)
+
+                       internal_collisions = {}
+                       for real_relative_path, files in 
real_relative_paths.items():
+                               # Detect internal collisions between 
non-identical files.
+                               if len(files) >= 2:
+                                       files.sort()
+                                       for i in range(len(files) - 1):
+                                               file1 = 
normalize_path(os.path.join(srcroot, files[i]))
+                                               file2 = 
normalize_path(os.path.join(srcroot, files[i+1]))
+                                               # Compare files, ignoring 
differences in times.
+                                               differences = 
compare_files(file1, file2, skipped_types=("atime", "mtime", "ctime"))
+                                               if differences:
+                                                       
internal_collisions.setdefault(real_relative_path, {})[(files[i], files[i+1])] 
= differences
+
                        if progress_shown:
                                showMessage(_("100% done\n"))
-                       return collisions, dirs_ro, symlink_collisions, 
plib_collisions
+
+                       return collisions, internal_collisions, dirs_ro, 
symlink_collisions, plib_collisions
 
        def _lstat_inode_map(self, path_iter):
                """
@@ -4081,7 +4105,7 @@ class dblink(object):
                        if blocker.exists():
                                blockers.append(blocker)
 
-               collisions, dirs_ro, symlink_collisions, plib_collisions = \
+               collisions, internal_collisions, dirs_ro, symlink_collisions, 
plib_collisions = \
                        self._collision_protect(srcroot, destroot,
                        others_in_slot + blockers, filelist, linklist)
 
@@ -4109,6 +4133,29 @@ class dblink(object):
                        eerror(msg)
                        return 1
 
+               if internal_collisions:
+                       msg = _("Package '%s' has internal collisions between 
non-identical files "
+                               "(located in separate directories in the 
installation image (${D}) "
+                               "corresponding to merged directories in the 
target "
+                               "filesystem (${ROOT})):") % self.settings.mycpv
+                       msg = textwrap.wrap(msg, 70)
+                       msg.append("")
+                       for k, v in sorted(internal_collisions.items(), 
key=operator.itemgetter(0)):
+                               msg.append("\t%s" % os.path.join(destroot, 
k.lstrip(os.path.sep)))
+                               for (file1, file2), differences in 
sorted(v.items()):
+                                       msg.append("\t\t%s" % 
os.path.join(destroot, file1.lstrip(os.path.sep)))
+                                       msg.append("\t\t%s" % 
os.path.join(destroot, file2.lstrip(os.path.sep)))
+                                       msg.append("\t\t\tDifferences: %s" % ", 
".join(differences))
+                                       msg.append("")
+                       self._elog("eerror", "preinst", msg)
+
+                       msg = _("Package '%s' NOT merged due to internal 
collisions "
+                               "between non-identical files.") % 
self.settings.mycpv
+                       msg += _(" If necessary, refer to your elog messages 
for the whole "
+                               "content of the above message.")
+                       eerror(textwrap.wrap(msg, 70))
+                       return 1
+
                if symlink_collisions:
                        # Symlink collisions need to be distinguished from 
other types
                        # of collisions, in order to avoid confusion (see bug 
#409359).

diff --git a/lib/portage/util/_compare_files.py 
b/lib/portage/util/_compare_files.py
new file mode 100644
index 000000000..bd993e501
--- /dev/null
+++ b/lib/portage/util/_compare_files.py
@@ -0,0 +1,103 @@
+# Copyright 2019 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+__all__ = ["compare_files"]
+
+import io
+import os
+import stat
+import sys
+
+from portage import _encodings
+from portage import _unicode_encode
+from portage.util._xattr import xattr
+
+def compare_files(file1, file2, skipped_types=()):
+       """
+       Compare metadata and contents of two files.
+
+       @param file1: File 1
+       @type file1: str
+       @param file2: File 2
+       @type file2: str
+       @param skipped_types: Tuple of strings specifying types of properties 
excluded from comparison.
+               Supported strings: type, mode, owner, group, device_number, 
xattr, atime, mtime, ctime, size, content
+       @type skipped_types: tuple of str
+       @rtype: tuple of str
+       @return: Tuple of strings specifying types of properties different 
between compared files
+       """
+
+       file1_stat = os.lstat(_unicode_encode(file1, encoding=_encodings["fs"], 
errors="strict"))
+       file2_stat = os.lstat(_unicode_encode(file2, encoding=_encodings["fs"], 
errors="strict"))
+
+       differences = []
+
+       if (file1_stat.st_dev, file1_stat.st_ino) == (file2_stat.st_dev, 
file2_stat.st_ino):
+               return ()
+
+       if "type" not in skipped_types and stat.S_IFMT(file1_stat.st_mode) != 
stat.S_IFMT(file2_stat.st_mode):
+               differences.append("type")
+       if "mode" not in skipped_types and stat.S_IMODE(file1_stat.st_mode) != 
stat.S_IMODE(file2_stat.st_mode):
+               differences.append("mode")
+       if "owner" not in skipped_types and file1_stat.st_uid != 
file2_stat.st_uid:
+               differences.append("owner")
+       if "group" not in skipped_types and file1_stat.st_gid != 
file2_stat.st_gid:
+               differences.append("group")
+       if "device_number" not in skipped_types and file1_stat.st_rdev != 
file2_stat.st_rdev:
+               differences.append("device_number")
+
+       if "xattr" not in skipped_types and sorted(xattr.get_all(file1, 
nofollow=True)) != sorted(xattr.get_all(file2, nofollow=True)):
+               differences.append("xattr")
+
+       if sys.hexversion >= 0x3030000:
+               if "atime" not in skipped_types and file1_stat.st_atime_ns != 
file2_stat.st_atime_ns:
+                       differences.append("atime")
+               if "mtime" not in skipped_types and file1_stat.st_mtime_ns != 
file2_stat.st_mtime_ns:
+                       differences.append("mtime")
+               if "ctime" not in skipped_types and file1_stat.st_ctime_ns != 
file2_stat.st_ctime_ns:
+                       differences.append("ctime")
+       else:
+               if "atime" not in skipped_types and file1_stat.st_atime != 
file2_stat.st_atime:
+                       differences.append("atime")
+               if "mtime" not in skipped_types and file1_stat.st_mtime != 
file2_stat.st_mtime:
+                       differences.append("mtime")
+               if "ctime" not in skipped_types and file1_stat.st_ctime != 
file2_stat.st_ctime:
+                       differences.append("ctime")
+
+       if "type" in differences:
+               pass
+       elif file1_stat.st_size != file2_stat.st_size:
+               if "size" not in skipped_types:
+                       differences.append("size")
+               if "content" not in skipped_types:
+                       differences.append("content")
+       else:
+               if "content" not in skipped_types:
+                       if stat.S_ISLNK(file1_stat.st_mode):
+                               file1_stream = 
io.BytesIO(os.readlink(_unicode_encode(file1,
+                                                                       
encoding=_encodings["fs"],
+                                                                       
errors="strict")))
+                       else:
+                               file1_stream = open(_unicode_encode(file1,
+                                                       
encoding=_encodings["fs"],
+                                                       errors="strict"), "rb")
+                       if stat.S_ISLNK(file2_stat.st_mode):
+                               file2_stream = 
io.BytesIO(os.readlink(_unicode_encode(file2,
+                                                                       
encoding=_encodings["fs"],
+                                                                       
errors="strict")))
+                       else:
+                               file2_stream = open(_unicode_encode(file2,
+                                                       
encoding=_encodings["fs"],
+                                                       errors="strict"), "rb")
+                       while True:
+                               file1_content = file1_stream.read(4096)
+                               file2_content = file2_stream.read(4096)
+                               if file1_content != file2_content:
+                                       differences.append("content")
+                                       break
+                               if not file1_content or not file2_content:
+                                       break
+                       file1_stream.close()
+                       file2_stream.close()
+
+       return tuple(differences)

Reply via email to