Hi Bruno, Collin Funk <collin.fu...@gmail.com> writes:
> Thanks, I reproduced your findings. I'll have a look at fixing it with > your suggestions. I've applied a patch following your suggestions. I believe the slowness in --extract-dependents caused by my previous patch was caused by the parsing of all files in modules/* done during GLModules.__init__(). Your implementation was faster for the non-recursive case, so I've added it back with very minor changes and used an optional top_level argument. Collin
>From ac562bbfa2fd80d23bc0cb10d64745fe91d074da Mon Sep 17 00:00:00 2001 From: Collin Funk <collin.fu...@gmail.com> Date: Wed, 4 Dec 2024 18:49:45 -0800 Subject: [PATCH] gnulib-tool.py: Make --extract-dependents quick again. Suggested by Bruno Haible in: <https://lists.gnu.org/archive/html/bug-gnulib/2024-12/msg00012.html>. * pygnulib/GLModuleSystem.py (GLModule.getDependents): Add an optional argument to use the implementation before yesterdays change. * pygnulib/main.py (mode == 'extract-dependents'): Add the optional argument to call of GLModule.getDependents. --- ChangeLog | 10 +++++++ pygnulib/GLModuleSystem.py | 53 +++++++++++++++++++++++++++++++++++--- pygnulib/main.py | 2 +- 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index bc5f2b5f29..c783488d21 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2024-12-04 Collin Funk <collin.fu...@gmail.com> + + gnulib-tool.py: Make --extract-dependents quick again. + Suggested by Bruno Haible in: + <https://lists.gnu.org/archive/html/bug-gnulib/2024-12/msg00012.html>. + * pygnulib/GLModuleSystem.py (GLModule.getDependents): Add an optional + argument to use the implementation before yesterdays change. + * pygnulib/main.py (mode == 'extract-dependents'): Add the optional + argument to call of GLModule.getDependents. + 2024-12-04 Bruno Haible <br...@clisp.org> bison-i18n: Ensure BISON_LOCALEDIR_c expands to a C string. diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py index 7d68b712e0..dda1cc3693 100644 --- a/pygnulib/GLModuleSystem.py +++ b/pygnulib/GLModuleSystem.py @@ -588,11 +588,58 @@ def _getDependents(self, modules: list[GLModule] | None = None) -> list[GLModule self.cache['dependents'] = result return self.cache['dependents'] - def getDependents(self) -> list[GLModule]: + def getDependents(self, top_level: bool | None = True) -> list[GLModule]: '''Return list of dependents (a.k.a. "reverse dependencies"), as a list of GLModule objects. - GLConfig: localpath.''' - return self._getDependents() + GLConfig: localpath. Arguments are: + - top_level: Optional argument, to use an optimized version from main().''' + # Only use optimized version, below if called from the main(). + if not top_level: + return self._getDependents() + localpath = self.config['localpath'] + # Find a set of module candidates quickly. + # Convert the module name to a POSIX basic regex. + # Needs to handle . [ \ * ^ $. + regex = self.name.replace('\\', '\\\\').replace('[', '\\[').replace('^', '\\^') + regex = re.compile(r'([.*$])').sub(r'[\1]', regex) + line_regex = '^' + regex + # We can't add a '$' to line_regex, because that would fail to match + # lines that denote conditional dependencies. We could invoke grep + # twice, once to search for line_regex + '$' and once to search + # for line_regex + [ <TAB>] but that would be twice as slow. + # Read module candidates from gnulib root directory. + command = "find modules -type f -print | xargs -n 100 grep -l %s /dev/null | sed -e 's,^modules/,,'" % shlex.quote(line_regex) + result = sp.run(command, shell=True, cwd=DIRS['root'], capture_output=True).stdout.decode('utf-8') + if localpath != None and len(localpath) > 0: + command = "find modules -type f -print | xargs -n 100 grep -l %s /dev/null | sed -e 's,^modules/,,' -e 's,\\.diff$,,'" % shlex.quote(line_regex) + for localdir in localpath: + result += sp.run(command, shell=True, cwd=localdir, capture_output=True).stdout.decode('utf-8') + listing = [ line + for line in result.split('\n') + if line.strip() ] + # Remove modules/ prefix from each file name. + pattern = re.compile(r'^modules/') + listing = [ pattern.sub('', line) + for line in listing ] + # Filter out undesired file names. + listing = [ line + for line in listing + if self.modulesystem.file_is_module(line) ] + # ${module}-tests implicitly depends on ${module}, if both exist. + if self.isNonTests(): + implicit_dependent = self.name+'-tests' + if self.modulesystem.exists(implicit_dependent): + listing.append(implicit_dependent) + candidates = sorted(set(listing)) + result = [] + for name in candidates: + module = self.modulesystem.find(name) + if module: # Ignore module candidates that don't actually exist. + if self in module.getDependenciesWithoutConditions(): + result.append(module) + self.cache['dependents'] = result + return self.cache['dependents'] + def getDependentsRecursively(self) -> set[GLModule]: '''Return a list of recursive dependents of this module, diff --git a/pygnulib/main.py b/pygnulib/main.py index f6aa9d47ea..d70b566269 100644 --- a/pygnulib/main.py +++ b/pygnulib/main.py @@ -1289,7 +1289,7 @@ def main(temp_directory: str) -> None: for name in modules: module = modulesystem.find(name) if module: - dependents = module.getDependents() + dependents = module.getDependents(top_level=True) dependents_names = sorted([ m.name for m in dependents ]) sys.stdout.write(lines_to_multiline(dependents_names)) -- 2.47.1