commit: 6b6b25b5a046d660cb316cb973f69abfb9ff9259 Author: Andrei Horodniceanu <a.horodniceanu <AT> proton <DOT> me> AuthorDate: Wed Feb 19 16:00:53 2025 +0000 Commit: Arthur Zamarin <arthurzam <AT> gentoo <DOT> org> CommitDate: Sat Mar 8 11:31:44 2025 +0000 URL: https://gitweb.gentoo.org/proj/javatoolkit.git/commit/?id=6b6b25b5
cvv: Refactor and implement multi-release JAR files Added type annotations to cvv.py and class_version_verify.py. Fixed an issue in cvv.py that caused the version encoded in a .class file to be unpacked as i16 instead of u16 Fixed an issue in class_version_verify.py that caused the -f argument to trigger runtime errors. The format of the output is the same as the previous code, so long as it correctly diagnosed an issue. Closes: https://bugs.gentoo.org/900767 Signed-off-by: Andrei Horodniceanu <a.horodniceanu <AT> proton.me> Approved-by: Volkmar W. Pogatzki <gentoo <AT> pogatzki.net> Signed-off-by: Arthur Zamarin <arthurzam <AT> gentoo.org> NEWS | 1 + src/javatoolkit/cvv.py | 235 +++++++++++++++++++----- src/javatoolkit/scripts/class_version_verify.py | 80 ++++++-- 3 files changed, 259 insertions(+), 57 deletions(-) diff --git a/NEWS b/NEWS index a1ff448..1c21ec8 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,5 @@ 0.6.8 (???) +- Support Multi-Release jars, bug #900767 - Migrate build-system to flit with pyproject.toml 0.6.7 (19 Jun 2021) diff --git a/src/javatoolkit/cvv.py b/src/javatoolkit/cvv.py index 2b4eec6..ac92163 100644 --- a/src/javatoolkit/cvv.py +++ b/src/javatoolkit/cvv.py @@ -2,60 +2,213 @@ # Copyright 1999-2008 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -import os -import sys +from dataclasses import dataclass from struct import unpack from zipfile import ZipFile +import os +import re +import typing as T + + +@dataclass(frozen=True) +class FileLoc: + path: str + + +@dataclass(frozen=True) +class JarLoc: + '''A file inside a jar archive''' + jar: FileLoc + member: str + + +Loc = FileLoc | JarLoc + + +@dataclass +class ClassFile: + loc: Loc + encoded_version: str + expected_version: str + + +@dataclass +class BadMultireleaseManifest: + '''A multi-release jar but without `Multi-Release: true` in MANIFEST.MF''' + loc: JarLoc + multiReleaseDirs: list[JarLoc] + + +@dataclass +class SkippedVersionDir: + loc: JarLoc + reason: str + + +@dataclass +class SkippedModuleInfo(ClassFile): + reason: str = 'A module-info requires java release >= 9' + + +GoodFile = ClassFile +BadFile = ClassFile | BadMultireleaseManifest +SkippedFile = SkippedVersionDir | SkippedModuleInfo class CVVMagic: - def __init__(self, target): + def __init__(self, target: str) -> None: # this is a number 8 9 10 11 etc, not including 1. if '.' in target: self.target = int(target.split(".")[-1]) else: self.target = int(target) - self.good = [] - self.bad = [] - self.skipped = [] - - def add(self, version, jar, file): - if file == "module-info.class" and self.target < 9: - self.skipped.append((version, jar, file)) - elif version <= self.target: - if version < 9: - self.good.append(("1.%s" % (version), jar, file)) - else: - self.good.append((version, jar, file)) + self.good: list[GoodFile] = [] + self.bad: list[BadFile] = [] + self.skipped: list[SkippedFile] = [] + + def add(self, version: int, loc: Loc, target_version: T.Optional[int] = None) -> None: + if target_version is None: + target_version = self.target + + cf = ClassFile( + loc, + encoded_version=self.__format_version(version), + expected_version=self.__format_version(target_version)) + + if CVVMagic.__is_module_info(loc) and target_version < 9: + self.__on_skipped(SkippedModuleInfo( + cf.loc, cf.encoded_version, cf.expected_version)) + return + + if version <= target_version: + self.__on_good(cf) else: - if version < 9: - self.bad.append(("1.%s" % (version), jar, file)) - else: - self.bad.append((version, jar, file)) - - def do_class(self,filename): - classFile = open(filename,"rb") - classFile.seek(4) - temp = classFile.read(4) - (version,) = unpack('>xxh',temp) - version -= 44 - self.add(version, None, filename) - - def do_jar(self, filename): - zipfile = ZipFile(filename, 'r') - - for file in zipfile.namelist(): - if file.endswith('class'): - classFile = zipfile.read(file) - (version,) = unpack('>h',classFile[6:8]) - version -= 44 - self.add(version, filename, file) - - def do_file(self, filename): + self.__on_bad(cf) + + def do_class(self, class_file: T.IO[bytes], filename: FileLoc) -> None: + version = self.__extract_version(class_file) + self.add(version, filename) + + def do_jar(self, jar: ZipFile, jar_path: FileLoc) -> None: + def jar_loc(path: str) -> JarLoc: + return JarLoc(jar_path, path) + + is_multirelease = False + try: + manifest = jar.open('META-INF/MANIFEST.MF', 'r') + except KeyError: + pass + else: + with manifest: + lines = [line.decode('utf-8').rstrip() for line in manifest.readlines()] + is_multirelease = 'Multi-Release: true' in lines + + invalid_version_dirs: set[str] = set() + seen_skipped_dirs: set[str] = set() + for path in jar.namelist(): + if not path.endswith('class'): + continue + + loc = jar_loc(path) + + with jar.open(path, 'r') as class_file: + target_version = None + match self.__get_multirelease_target_version(path): + case int(tv): + if is_multirelease: + target_version = tv + else: + version_dir = path.split('/', 3)[:3] + invalid_version_dirs.add('/'.join(version_dir)) + continue + case (ver_dir, reason): + if ver_dir not in seen_skipped_dirs: + seen_skipped_dirs.add(ver_dir) + self.__on_skipped(SkippedVersionDir( + jar_loc(ver_dir), reason)) + continue + case None: + pass + + version = self.__extract_version(class_file) + self.add(version, loc, target_version) + + if len(invalid_version_dirs): + self.__on_bad(BadMultireleaseManifest( + jar_loc('META-INF/MANIFEST.MF'), + [jar_loc(d) for d in sorted(invalid_version_dirs)])) + + def do(self, filename: str) -> None: if not os.path.islink(filename): if filename.endswith(".class"): - self.do_class(filename) + with open(filename, 'rb') as class_file: + self.do_class(class_file, FileLoc(filename)) if filename.endswith(".jar"): - self.do_jar(filename) + with ZipFile(filename, 'r') as jar: + self.do_jar(jar, FileLoc(filename)) + + @classmethod + def __extract_version(cls, file: T.IO[bytes]) -> int: + data = file.read(8) + if len(data) != 8: + raise ValueError(f'Need the first 8 bytes of a java .class file, got: {len(data)}') + # A .class file is encoded like (all big-endian): + # u4 - magic + # u2 - minor version + # u2 - major version + result = unpack('>4x2xH', data)[0] + return result - 44 + + @classmethod + def __get_multirelease_target_version(cls, path: str) -> int | None | tuple[str, str]: + '''Get the target verion of a possible multi-release class file + + Returns: + int target_version - If the path is under META-INF/versions/${target_version} + None - If the path is not part of META-INF/versions + (directory, reason) - The directory portion of `path` that should be ignored + ''' + result = None + + parts = path.split('/', 3) + if len(parts) >= 3 and parts[:2] == ['META-INF', 'versions']: + expected_version = parts[2] + # https://docs.oracle.com/en/java/javase/23/docs/specs/jar/jar.html#multi-release-jar-files + # If the version is not a number or < 9 it is ignored + + ver_dir = '/'.join(parts[:3]) + reasonBase = f'The version directory "{expected_version}" ' + if not expected_version.isdecimal(): + return (ver_dir, reasonBase + 'is not a number') + if (result := int(expected_version)) < 9: + return (ver_dir, reasonBase + 'is less than 9') + + return result + + __module_info_jar_pattern = re.compile('(META-INF/versions/[1-9][0-9]*/)?module-info.class') + __module_info_file_pattern = re.compile('module-info.class') + + @classmethod + def __is_module_info(cls, filepath: Loc) -> bool: + match filepath: + case FileLoc(target): + ptn = cls.__module_info_file_pattern + case JarLoc(member=target): + ptn = cls.__module_info_jar_pattern + + return ptn.fullmatch(target) is not None + + @classmethod + def __format_version(cls, version: int) -> str: + return f'1.{version}' if version < 9 else f'{version}' + + def __on_good(self, goodFile: GoodFile) -> None: + self.good.append(goodFile) + + def __on_bad(self, badFile: BadFile) -> None: + self.bad.append(badFile) + + def __on_skipped(self, skippedFile: SkippedFile) -> None: + self.skipped.append(skippedFile) # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 nowrap: diff --git a/src/javatoolkit/scripts/class_version_verify.py b/src/javatoolkit/scripts/class_version_verify.py index 1394146..fd544d3 100755 --- a/src/javatoolkit/scripts/class_version_verify.py +++ b/src/javatoolkit/scripts/class_version_verify.py @@ -6,10 +6,10 @@ import os import sys from optparse import OptionParser, make_option -from ..cvv import CVVMagic +from .. import cvv -def main(): +def main() -> None: options_list = [ make_option( "-r", @@ -30,7 +30,7 @@ def main(): action="store_true", dest="verbose", default=False, - help="Print version of every class"), + help="Print details about analyzed files"), make_option( "-s", "--silent", @@ -56,35 +56,38 @@ def main(): print("-t is mandatory") sys.exit(2) - cvv_magic = CVVMagic(options.version) + cvv_magic = cvv.CVVMagic(options.version) for arg in args: if os.path.isfile(arg): - cvv_magic.do_file(arg) + cvv_magic.do(arg) if options.deep and os.path.isdir(arg): for root, dirs, files in os.walk(arg): for filename in files: - cvv_magic.do_file("%s/%s" % (root, filename)) + cvv_magic.do("%s/%s" % (root, filename)) if options.file_only: - lst = set([set[1] for set in cvv_magic.bad]) + lst = set() + for info in cvv_magic.bad: + match info.loc: + case cvv.FileLoc(path) | cvv.JarLoc(cvv.FileLoc(path), _): + lst.add(path) for i in lst: print(i) else: if options.verbose: - for set in cvv_magic.good: - print("Good: %s %s %s" % set) + for good in cvv_magic.good: + print(__format_good(good)) if not options.silent: - for set in cvv_magic.bad: - print("Bad: %s %s %s" % set) - for set in cvv_magic.skipped: - print("Skipped: %s %s %s" % set) + for bad in cvv_magic.bad: + print(__format_bad(bad)) + for skipped in cvv_magic.skipped: + print(__format_skipped(skipped)) - print("CVV: %s\nChecked: %i Good: %i Bad: %i Skipped: %i" % - (options.version, len(cvv_magic.good) + len(cvv_magic.bad) + len(cvv_magic.skipped), - len(cvv_magic.good), len(cvv_magic.bad), len(cvv_magic.skipped))) + print(f'CVV: {options.version}') + print(__get_total_line(cvv_magic)) if len(cvv_magic.bad) > 0: sys.exit(1) @@ -92,5 +95,50 @@ def main(): sys.exit(0) +def __get_total_line(cvv_magic: cvv.CVVMagic) -> str: + good = len(cvv_magic.good) + bad = len(cvv_magic.bad) + skipped = len(cvv_magic.skipped) + total = good + bad + skipped + return f'Checked: {total} Good: {good} Bad: {bad} Skipped: {skipped}' + + +def __format_class(class_file: cvv.ClassFile) -> str: + return f'{class_file.encoded_version} {__format_loc(class_file.loc)}' + + +def __format_skipped(f: cvv.SkippedFile) -> str: + msg: str + match f: + case cvv.SkippedModuleInfo() as cf: + msg = __format_class(cf) + case cvv.SkippedVersionDir(loc, reason): + msg = f'{__format_loc(loc)} because "{reason}"' + return f'Skipped: {msg}' + + +def __format_bad(f: cvv.BadFile) -> str: + msg: str + match f: + case cvv.ClassFile(): + msg = f'{__format_class(f)}' + case cvv.BadMultireleaseManifest(loc, multiReleaseDirs): + plain_dirs = [d.member for d in multiReleaseDirs] + msg = f'{__format_loc(loc)} missing "Multi-Release: true" for {plain_dirs}' + return f'Bad: {msg}' + + +def __format_good(f: cvv.GoodFile) -> str: + return f'Good: {__format_class(f)}' + + +def __format_loc(loc: cvv.Loc) -> str: + match loc: + case cvv.FileLoc(path): + return f'None {path}' + case cvv.JarLoc(jar, member): + return f'{jar.path} {member}' + + if __name__ == '__main__': main()
