commit:     eb2674c1c2d84b2c9e9923e1c1666cb7a596c36c
Author:     Zac Medico <zmedico <AT> gentoo <DOT> org>
AuthorDate: Sat Jan 19 08:39:47 2019 +0000
Commit:     Zac Medico <zmedico <AT> gentoo <DOT> org>
CommitDate: Sun Jan 20 22:57:28 2019 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=eb2674c1

INSTALL_MASK: index patterns anchored with leading slash (bug 675826)

For scalability, use a tree structure to index patterns that are
anchored with a leading slash.

Patterns anchored with leading slash are indexed by leading non-glob
components, making it possible to minimize the number of fnmatch
calls. For example:

  /foo*/bar -> {'.': ['/foo*/bar']}

  /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}

  /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}

Bug: https://bugs.gentoo.org/675826
Signed-off-by: Zac Medico <zmedico <AT> gentoo.org>

 lib/portage/tests/util/test_install_mask.py | 36 +++++++++++++
 lib/portage/util/install_mask.py            | 84 ++++++++++++++++++++++++++---
 2 files changed, 113 insertions(+), 7 deletions(-)

diff --git a/lib/portage/tests/util/test_install_mask.py 
b/lib/portage/tests/util/test_install_mask.py
index f651eb4b7..6a29db79a 100644
--- a/lib/portage/tests/util/test_install_mask.py
+++ b/lib/portage/tests/util/test_install_mask.py
@@ -119,6 +119,42 @@ class InstallMaskTestCase(TestCase):
                                        ),
                                )
                        ),
+                       (
+                               '/usr/share/locale '
+                               '-/usr/share/locale/en* '
+                               '-/usr/share/locale/kf5_all_languages '
+                               '-/usr/share/locale/locale.alias',
+                               (
+                                       (
+                                               'usr/share/locale/en',
+                                               False,
+                                       ),
+                                       (
+                                               'usr/share/locale/en_GB',
+                                               False,
+                                       ),
+                                       (
+                                               
'usr/share/locale/en/kf5_all_languages',
+                                               False,
+                                       ),
+                                       (
+                                               'usr/share/locale/locale.alias',
+                                               False,
+                                       ),
+                                       (
+                                               'usr/share/locale/es',
+                                               True,
+                                       ),
+                                       (
+                                               'usr/share/locale/fr',
+                                               True,
+                                       ),
+                                       (
+                                               'usr/share/locale',
+                                               True,
+                                       ),
+                               )
+                       ),
                )
 
                for install_mask_str, paths in cases:

diff --git a/lib/portage/util/install_mask.py b/lib/portage/util/install_mask.py
index 32627eb05..a8c0cbda5 100644
--- a/lib/portage/util/install_mask.py
+++ b/lib/portage/util/install_mask.py
@@ -3,8 +3,11 @@
 
 __all__ = ['install_mask_dir', 'InstallMask']
 
+import collections
 import errno
 import fnmatch
+import functools
+import operator
 import sys
 
 from portage import os, _unicode_decode
@@ -18,13 +21,82 @@ else:
        _unicode = unicode
 
 
+def _defaultdict_tree():
+       return collections.defaultdict(_defaultdict_tree)
+
+
+_pattern = collections.namedtuple('_pattern', (
+       'orig_index',
+       'is_inclusive',
+       'pattern',
+       'leading_slash',
+))
+
+
 class InstallMask(object):
        def __init__(self, install_mask):
                """
                @param install_mask: INSTALL_MASK value
                @type install_mask: str
                """
-               self._install_mask = install_mask.split()
+               # Patterns not anchored with leading slash
+               self._unanchored = []
+
+               # Patterns anchored with leading slash are indexed by leading
+               # non-glob components, making it possible to minimize the
+               # number of fnmatch calls. For example:
+               # /foo*/bar -> {'.': ['/foo*/bar']}
+               # /foo/bar* -> {'foo': {'.': ['/foo/bar*']}}
+               # /foo/bar/ -> {'foo': {'bar': {'.': ['/foo/bar/']}}}
+               self._anchored = _defaultdict_tree()
+               for orig_index, pattern in enumerate(install_mask.split()):
+                       # if pattern starts with -, possibly exclude this path
+                       is_inclusive = not pattern.startswith('-')
+                       if not is_inclusive:
+                               pattern = pattern[1:]
+                       pattern_obj = _pattern(orig_index, is_inclusive, 
pattern, pattern.startswith('/'))
+                       # absolute path pattern
+                       if pattern_obj.leading_slash:
+                               current_dir = self._anchored
+                               for component in list(filter(None, 
pattern.split('/'))):
+                                       if '*' in component:
+                                               break
+                                       else:
+                                               current_dir = 
current_dir[component]
+                               current_dir.setdefault('.', 
[]).append(pattern_obj)
+
+                       # filename
+                       else:
+                               self._unanchored.append(pattern_obj)
+
+       def _iter_relevant_patterns(self, path):
+               """
+               Iterate over patterns that may be relevant for the given path.
+
+               Patterns anchored with leading / are indexed by leading
+               non-glob components, making it possible to minimize the
+               number of fnmatch calls.
+               """
+               current_dir = self._anchored
+               components = list(filter(None, path.split('/')))
+               patterns = []
+               patterns.extend(current_dir.get('.', []))
+               for component in components:
+                       next_dir = current_dir.get(component, None)
+                       if next_dir is None:
+                               break
+                       current_dir = next_dir
+                       patterns.extend(current_dir.get('.', []))
+
+               if patterns:
+                       # Sort by original pattern index, since order matters 
for
+                       # non-inclusive patterns.
+                       patterns.extend(self._unanchored)
+                       if any(not pattern.is_inclusive for pattern in 
patterns):
+                               
patterns.sort(key=operator.attrgetter('orig_index'))
+                       return iter(patterns)
+
+               return iter(self._unanchored)
 
        def match(self, path):
                """
@@ -34,13 +106,11 @@ class InstallMask(object):
                @return: True if path matches INSTALL_MASK, False otherwise
                """
                ret = False
-               for pattern in self._install_mask:
-                       # if pattern starts with -, possibly exclude this path
-                       is_inclusive = not pattern.startswith('-')
-                       if not is_inclusive:
-                               pattern = pattern[1:]
+
+               for pattern_obj in self._iter_relevant_patterns(path):
+                       is_inclusive, pattern = pattern_obj.is_inclusive, 
pattern_obj.pattern
                        # absolute path pattern
-                       if pattern.startswith('/'):
+                       if pattern_obj.leading_slash:
                                # handle trailing slash for explicit directory 
match
                                if path.endswith('/'):
                                        pattern = pattern.rstrip('/') + '/'

Reply via email to