Hi Bruno,

Previously I suggested using defaultdict to remove the base
initialization case for dictionaries. But you noted a mistake I made
in my patch [1]:

> It's a nice Python class. But the use of defaultdict(list) makes only
> the two lines
> 
>             if str(module) not in self.dependers:
>                 self.dependers[str(module)] = []
> 
> redundant. It does not make the line
> 
>             if str(parent) not in self.dependers[str(module)]:
> 
> redundant. self.dependers[str(module)] should not contain duplicates.

Patch 0001 does the same but uses a set instead of checking for
membership in a list. The sets in self.dependers[str(module)] seems
relatively small atleast with ./test-emacs-1.sh so there won't be a
noticeable difference. But I think it is more clear.

Also, am I missing something or are the sets/lists at
self.dependers[str(module)] unused? Maybe it is due to translating
this section of code from associative arrays in bash/other shells? I'm
not very familiar so that is just a guess on my part.

Patch 0002 changes the str(module) inserted into lists in that section
of code to directly use the GLModule object. For accessing the
conditionals dictionaries we can use a tuple:

    (parent, module)

instead of a string like gnulib-tool.sh:

    '%s---%s' % (str(parent), str(module))

This makes this part of the code use GLModule's __hash__ method for
lookups instead of a string. A tuple containing all hashable elements
is also hashable so we can use that as the key.

Currently since GLModule.__hash__ only uses the module name, this has
the same meaning as the previous code. The benefit is that if in a
decade Gnulib wants more complex dependencies, you no longer have to
figure out how to fit it in a string. The only downside I can think of
is you can no longer grep for "---" to find the same section of code
in gnulib-tool.sh.

Also the str(module) sort of makes things harder to read. It is mostly
my fault that we have three ways to get a GLModule objects name:

    str(module)
    module.name
    module.getName()

which are all used in various places...

[1] https://lists.gnu.org/archive/html/bug-gnulib/2024-04/msg00112.html

Collin
From 71ddeb2f8bee6788f83d7c78acf9bd417ddc2517 Mon Sep 17 00:00:00 2001
From: Collin Funk <collin.fu...@gmail.com>
Date: Fri, 19 Apr 2024 11:32:44 -0700
Subject: [PATCH 1/2] gnulib-tool.py: Simplify data structures for
 dependencies.

* pygnulib/GLModuleSystem.py (GLModuleTable.__init__): Use a defaultdict
for dependers to remove the base initialization case.
(GLModuleTable.addConditional): Use a set to disallow duplicates instead
of performing list lookups.
---
 ChangeLog                  | 8 ++++++++
 pygnulib/GLModuleSystem.py | 8 +++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e84856cc05..d907a52c53 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2024-04-19  Collin Funk  <collin.fu...@gmail.com>
+
+	gnulib-tool.py: Simplify data structures for dependencies.
+	* pygnulib/GLModuleSystem.py (GLModuleTable.__init__): Use a defaultdict
+	for dependers to remove the base initialization case.
+	(GLModuleTable.addConditional): Use a set to disallow duplicates instead
+	of performing list lookups.
+
 2024-04-19  Bruno Haible  <br...@clisp.org>
 
 	gnulib-tool.py: Simplify running some commands in a given directory.
diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py
index 112a6bc0e6..ce1fad110c 100644
--- a/pygnulib/GLModuleSystem.py
+++ b/pygnulib/GLModuleSystem.py
@@ -23,6 +23,7 @@
 import sys
 import hashlib
 import subprocess as sp
+from collections import defaultdict
 from . import constants
 from .GLError import GLError
 from .GLConfig import GLConfig
@@ -721,7 +722,7 @@ def __init__(self, config: GLConfig, inc_all_direct_tests: bool, inc_all_indirec
           returns the condition when B should be enabled as a dependency of A,
           once the m4 code for A has been executed.
         '''
-        self.dependers = dict()
+        self.dependers = defaultdict(set)
         self.conditionals = dict()
         self.unconditionals = set()
         self.base_modules = []
@@ -763,10 +764,7 @@ def addConditional(self, parent: GLModule, module: GLModule, condition: str | bo
                             % type(condition).__name__)
         if str(module) not in self.unconditionals:
             # No unconditional dependency to the given module is known at this point.
-            if str(module) not in self.dependers:
-                self.dependers[str(module)] = []
-            if str(parent) not in self.dependers[str(module)]:
-                self.dependers[str(module)].append(str(parent))
+            self.dependers[str(module)].add(str(parent))
             key = '%s---%s' % (str(parent), str(module))
             self.conditionals[key] = condition
 
-- 
2.44.0

From bd54b3bca3f825fbac2be9a3ecd915ac2c2908ba Mon Sep 17 00:00:00 2001
From: Collin Funk <collin.fu...@gmail.com>
Date: Fri, 19 Apr 2024 12:00:33 -0700
Subject: [PATCH 2/2] gnulib-tool.py: Make use of GLModule's __hash__ method.

* pygnulib/GLModuleSystem.py (GLModuleTable.addUnconditional)
(GLModuleTable.isConditional): Use the GLModule object instead of the
module name directly.
(GLModuleTable.addConditional, GLModuleTable.getCondition): Likewise.
Use a tuple of two GLModule objects as a key for the conditionals
dictionary.
---
 ChangeLog                  | 10 ++++++++++
 pygnulib/GLModuleSystem.py | 18 ++++++++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index d907a52c53..65770c9c33 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2024-04-19  Collin Funk  <collin.fu...@gmail.com>
+
+	gnulib-tool.py: Make use of GLModule's __hash__ method.
+	* pygnulib/GLModuleSystem.py (GLModuleTable.addUnconditional)
+	(GLModuleTable.isConditional): Use the GLModule object instead of the
+	module name directly.
+	(GLModuleTable.addConditional, GLModuleTable.getCondition): Likewise.
+	Use a tuple of two GLModule objects as a key for the conditionals
+	dictionary.
+
 2024-04-19  Collin Funk  <collin.fu...@gmail.com>
 
 	gnulib-tool.py: Simplify data structures for dependencies.
diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py
index ce1fad110c..6d5e874375 100644
--- a/pygnulib/GLModuleSystem.py
+++ b/pygnulib/GLModuleSystem.py
@@ -762,27 +762,26 @@ def addConditional(self, parent: GLModule, module: GLModule, condition: str | bo
         if not (type(condition) is str or condition == True):
             raise TypeError('condition must be a string or True, not %s'
                             % type(condition).__name__)
-        if str(module) not in self.unconditionals:
+        if module not in self.unconditionals:
             # No unconditional dependency to the given module is known at this point.
-            self.dependers[str(module)].add(str(parent))
-            key = '%s---%s' % (str(parent), str(module))
-            self.conditionals[key] = condition
+            self.dependers[module].add(parent)
+            self.conditionals[(parent, module)] = condition
 
     def addUnconditional(self, module: GLModule) -> None:
         '''Add module as unconditional dependency.'''
         if type(module) is not GLModule:
             raise TypeError('module must be a GLModule, not %s'
                             % type(module).__name__)
-        self.unconditionals.add(str(module))
-        if str(module) in self.dependers:
-            self.dependers.pop(str(module))
+        self.unconditionals.add(module)
+        if module in self.dependers:
+            self.dependers.pop(module)
 
     def isConditional(self, module: GLModule) -> bool:
         '''Check whether module is unconditional.'''
         if type(module) is not GLModule:
             raise TypeError('module must be a GLModule, not %s'
                             % type(module).__name__)
-        result = str(module) in self.dependers
+        result = module in self.dependers
         return result
 
     def getCondition(self, parent: GLModule, module: GLModule) -> str | bool | None:
@@ -794,8 +793,7 @@ def getCondition(self, parent: GLModule, module: GLModule) -> str | bool | None:
         if type(module) is not GLModule:
             raise TypeError('module must be a GLModule, not %s'
                             % type(module).__name__)
-        key = '%s---%s' % (str(parent), str(module))
-        result = self.conditionals.get(key, None)
+        result = self.conditionals.get((parent, module), None)
         return result
 
     def transitive_closure(self, modules: list[GLModule]) -> list[GLModule]:
-- 
2.44.0

Reply via email to