commit: 81998be2e521a5e9ea4922cc757af081bb354c2a
Author: Brian Harring <ferringb <AT> gmail <DOT> com>
AuthorDate: Sat Nov 29 22:08:06 2025 +0000
Commit: Brian Harring <ferringb <AT> gmail <DOT> com>
CommitDate: Sun Nov 30 01:18:24 2025 +0000
URL:
https://gitweb.gentoo.org/proj/pkgcore/snakeoil.git/commit/?id=81998be2
feat: python -m snakeoil.tools.find_unuse_exports <package> <consumers...>
When invoked, it'll try to identify what is exported via __all__ but
none of the consumers actually import.
This is an ast implemetation thus limited: it'll never see through the fully
dynamiic runtime of python. The ast visitation class is also incomplete
most likely. Use this as a hint of things to look into.
Signed-off-by: Brian Harring <ferringb <AT> gmail.com>
src/snakeoil/tools/__init__.py | 0
src/snakeoil/tools/find_unused_exports.py | 187 ++++++++++++++++++++++++++++++
tests/test_code_quality.py | 1 +
3 files changed, 188 insertions(+)
diff --git a/src/snakeoil/tools/__init__.py b/src/snakeoil/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/snakeoil/tools/find_unused_exports.py
b/src/snakeoil/tools/find_unused_exports.py
new file mode 100644
index 0000000..36addae
--- /dev/null
+++ b/src/snakeoil/tools/find_unused_exports.py
@@ -0,0 +1,187 @@
+"If you're having to work on this, python -m ast <path-to-a-file> is your best
friend"
+
+__all__ = ("main",)
+
+
+import argparse
+import ast
+import logging
+import sys
+from pathlib import Path
+from textwrap import dedent
+from typing import Self, cast
+
+from snakeoil.python_namespaces import get_submodules_of
+
+# Generally hard requirement- avoid relying on snakeoil here. At somepoint
this
+# should be able to be pointed right back at snakeoil for finding components
internally
+# that are unused.
+
+logger = logging.getLogger(__name__)
+
+
+# This classes are effectively a tree that can be walked backwards as
+# we recurse into the import pathways where they reference back down the
pathways.
+# It is cyclic as all hell.
+class ModuleImport(ast.NodeVisitor, dict[str, "ModuleImport"]):
+ __slots__ = ("root", "parent", "name", "accesses", "unscoped_access",
"ctx_imports")
+
+ def __init__(self, root: Self | None, parent: Self | None, name: str) ->
None:
+ if name == "pkgcore.vdb.repo_ops":
+ import pdb
+
+ pdb.set_trace()
+ self.root = self if root is None else root # oh yeah, cyclic baby.
+ self.parent = self.root if parent is None else parent
+ self.name = name
+ self.accesses: set[str] = set()
+ self.unscoped_access: set[str] = set()
+ self.ctx_imports = dict[str, Self]()
+
+ @property
+ def qualname(self):
+ l = []
+ current = self
+ while current is not self.root:
+ l.append(current.name)
+ current = current.parent
+ return ".".join(reversed(l))
+
+ def __missing__(self, name: str) -> "ModuleImport":
+ assert "." not in name
+ self[name] = obj = self.__class__(self.root, parent=self, name=name)
+ return obj
+
+ def resolve_import(self, name: str) -> "ModuleImport":
+ parts = name.split(".")
+
+ current = self if parts[0] == "" else self.root
+ while parts and parts[0] == "":
+ if current is self.root:
+ raise Exception(
+ f"in {self.qualname}, an import tried to climb past root:
{name}"
+ )
+ current = current.parent
+ parts = parts[1:]
+ for part in parts:
+ current = current[part]
+ return current
+
+ def __str__(self) -> str:
+ return f"{self.qualname}: access={self.accesses!r}
unscoped={self.unscoped_access!r} known
ctx={list(sorted(self.ctx_imports.keys()))!r}"
+
+ def __repr__(self):
+ return str(self)
+
+
+class ImportCollector(ast.NodeVisitor):
+ __slotting_intentionally_disabled__ = True
+
+ def __init__(self, root: ModuleImport, name: str) -> None:
+ self.root = root
+ self.current = self.root.resolve_import(name)
+
+ def visit_Import(self, node):
+ for alias in node.names:
+ # rework this to look for getattrs
+
+ result = self.current.resolve_import(alias.name)
+ result.unscoped_access.add(self.current.name)
+ self.current.ctx_imports[alias.asname if alias.asname else
alias.name] = (
+ result
+ )
+
+ def visit_ImportFrom(self, node):
+ for alias in node.names:
+ if node.module is None:
+ continue # not touching that with a 20ft pole.
+ result = self.current.resolve_import(node.module)
+ result.accesses.add(alias.name)
+ self.current.ctx_imports[alias.asname if alias.asname else
alias.name] = (
+ result
+ )
+
+ def visit_Attribute(self, node):
+ if not hasattr(node.value, "id"):
+ return
+ # this isn't confirming there isn't shadowing-
+ # import os
+ # def foon(os): ... # just got shadowed, 'os' in that ctx is not
globals()['os']
+ # it takes effort, and it's not worth it; this tool is already known
loose.
+ if (target := self.current.ctx_imports.get(node.value.id, None)) is
not None:
+ target.accesses.add(node.attr)
+
+
+parser = argparse.ArgumentParser(
+ __name__.rsplit(".", 1)[-1],
+ description=dedent(
+ """\
+ Tool for finding potentially dead code
+
+ This imports all modules of the source namespace, then scans the target
+ namespaces actual imports to find identify if a member of the sources
__all__ is
+ actually used somewhere in the targets. It specifically knows how to
'see' through
+ snakeoil mechanisms to thunk an import- a lazy import.
+
+ It is not authorative; code doing imports within a function it isn't
written to 'see'.
+ Consider this tooling as a way to get suggestions of what is dead code
from the
+ standpoint of nothing in the target namespaces holds a reference to
the object, thus
+ either they do dynamic imports or getattrs- which we can't see- during
code execution-
+ or it's not in use.
+ """
+ ),
+)
+parser.add_argument(
+ "source",
+ action="store",
+ type=str,
+ help="the python module to import and scan recursively, using __all__ to
find things only used within that codebase.",
+)
+parser.add_argument(
+ "targets", type=str, nargs="+", help="python namespaces to scan for usage."
+)
+
+
+def main(options, out, err) -> int:
+ root = ModuleImport(None, None, "")
+ for target in tuple(options.targets) + (options.source,):
+ for mod in get_submodules_of(__import__(target), include_root=True):
+ p = cast(str, mod.__file__)
+ with Path(p).open() as f:
+ tree = ast.parse(f.read(), str(p))
+ ImportCollector(root, target).visit(tree)
+
+ source_modules = list(get_submodules_of(__import__(options.source)))
+ results = []
+ for mod in source_modules:
+ results.append(result := [mod.__name__])
+ if (mod_alls := getattr(mod, "__all__", None)) is None:
+ result.append(f"{mod.__name__} has no __all__. Not analyzing")
+ continue
+ collected = root.resolve_import(mod.__name__)
+ missing = list(sorted(set(mod_alls).difference(collected.accesses)))
+ if not missing:
+ continue
+ result.append(f"all is {list(sorted(mod_alls))}")
+ if collected.unscoped_access:
+ result.append(
+ f"unscoped access exists from {collected.unscoped_access!r}.
getattr() type isn't detectable current, results may be wrong"
+ )
+
+ result.append(f"possibly unused {missing}")
+
+ first = ""
+ for block in sorted(results, key=lambda l: l[0]):
+ if len(block) == 1:
+ continue
+ out.write(f"{first}{block[0]}\n")
+ first = "\n"
+ for lines in block[1:]:
+ out.write(f" {lines}\n")
+
+ return 0
+
+
+if __name__ == "__main__":
+ options = parser.parse_args()
+ sys.exit(main(options, sys.stdout, sys.stderr))
diff --git a/tests/test_code_quality.py b/tests/test_code_quality.py
index cf34604..f418071 100644
--- a/tests/test_code_quality.py
+++ b/tests/test_code_quality.py
@@ -20,6 +20,7 @@ class TestSlots(code_quality.Slots):
"snakeoil.stringio",
"snakeoil.tar",
"snakeoil.test",
+ "snakeoil.tools", # this is CLI stuff which a lot of it intentionally
avoids snakeoil internals
)
ignored_subclasses = (Exception,)
strict = True