commit:     6b6b25b5a046d660cb316cb973f69abfb9ff9259
Author:     Andrei Horodniceanu <a.horodniceanu <AT> proton <DOT> me>
AuthorDate: Wed Feb 19 16:00:53 2025 +0000
Commit:     Arthur Zamarin <arthurzam <AT> gentoo <DOT> org>
CommitDate: Sat Mar  8 11:31:44 2025 +0000
URL:        https://gitweb.gentoo.org/proj/javatoolkit.git/commit/?id=6b6b25b5

cvv: Refactor and implement multi-release JAR files

Added type annotations to cvv.py and class_version_verify.py.

Fixed an issue in cvv.py that caused the version encoded in a .class
file to be unpacked as i16 instead of u16

Fixed an issue in class_version_verify.py that caused the -f argument
to trigger runtime errors.

The format of the output is the same as the previous code, so long as
it correctly diagnosed an issue.

Closes: https://bugs.gentoo.org/900767
Signed-off-by: Andrei Horodniceanu <a.horodniceanu <AT> proton.me>
Approved-by: Volkmar W. Pogatzki <gentoo <AT> pogatzki.net>
Signed-off-by: Arthur Zamarin <arthurzam <AT> gentoo.org>

 NEWS                                            |   1 +
 src/javatoolkit/cvv.py                          | 235 +++++++++++++++++++-----
 src/javatoolkit/scripts/class_version_verify.py |  80 ++++++--
 3 files changed, 259 insertions(+), 57 deletions(-)

diff --git a/NEWS b/NEWS
index a1ff448..1c21ec8 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,5 @@
 0.6.8 (???)
+- Support Multi-Release jars, bug #900767
 - Migrate build-system to flit with pyproject.toml
 
 0.6.7 (19 Jun 2021)

diff --git a/src/javatoolkit/cvv.py b/src/javatoolkit/cvv.py
index 2b4eec6..ac92163 100644
--- a/src/javatoolkit/cvv.py
+++ b/src/javatoolkit/cvv.py
@@ -2,60 +2,213 @@
 # Copyright 1999-2008 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
-import os
-import sys
+from dataclasses import dataclass
 from struct import unpack
 from zipfile import ZipFile
+import os
+import re
+import typing as T
+
+
+@dataclass(frozen=True)
+class FileLoc:
+    path: str
+
+
+@dataclass(frozen=True)
+class JarLoc:
+    '''A file inside a jar archive'''
+    jar: FileLoc
+    member: str
+
+
+Loc = FileLoc | JarLoc
+
+
+@dataclass
+class ClassFile:
+    loc: Loc
+    encoded_version: str
+    expected_version: str
+
+
+@dataclass
+class BadMultireleaseManifest:
+    '''A multi-release jar but without `Multi-Release: true` in MANIFEST.MF'''
+    loc: JarLoc
+    multiReleaseDirs: list[JarLoc]
+
+
+@dataclass
+class SkippedVersionDir:
+    loc: JarLoc
+    reason: str
+
+
+@dataclass
+class SkippedModuleInfo(ClassFile):
+    reason: str = 'A module-info requires java release >= 9'
+
+
+GoodFile = ClassFile
+BadFile = ClassFile | BadMultireleaseManifest
+SkippedFile = SkippedVersionDir | SkippedModuleInfo
 
 
 class CVVMagic:
-    def __init__(self, target):
+    def __init__(self, target: str) -> None:
         # this is a number 8 9 10 11 etc, not including 1.
         if '.' in target:
             self.target = int(target.split(".")[-1])
         else:
             self.target = int(target)
-        self.good = []
-        self.bad = []
-        self.skipped = []
-
-    def add(self, version, jar, file):
-        if file == "module-info.class" and self.target < 9:
-            self.skipped.append((version, jar, file))
-        elif version <= self.target:
-            if version < 9:
-                self.good.append(("1.%s" % (version), jar, file))
-            else:
-                self.good.append((version, jar, file))
+        self.good: list[GoodFile] = []
+        self.bad: list[BadFile] = []
+        self.skipped: list[SkippedFile] = []
+
+    def add(self, version: int, loc: Loc, target_version: T.Optional[int] = 
None) -> None:
+        if target_version is None:
+            target_version = self.target
+
+        cf = ClassFile(
+            loc,
+            encoded_version=self.__format_version(version),
+            expected_version=self.__format_version(target_version))
+
+        if CVVMagic.__is_module_info(loc) and target_version < 9:
+            self.__on_skipped(SkippedModuleInfo(
+                cf.loc, cf.encoded_version, cf.expected_version))
+            return
+
+        if version <= target_version:
+            self.__on_good(cf)
         else:
-            if version < 9:
-                self.bad.append(("1.%s" % (version), jar, file))
-            else:
-                self.bad.append((version, jar, file))
-
-    def do_class(self,filename):
-        classFile = open(filename,"rb")
-        classFile.seek(4)
-        temp = classFile.read(4)
-        (version,) = unpack('>xxh',temp)
-        version -= 44
-        self.add(version, None, filename)
-
-    def do_jar(self, filename):
-        zipfile = ZipFile(filename, 'r')
-
-        for file in zipfile.namelist():
-            if file.endswith('class'):
-                classFile = zipfile.read(file)
-                (version,) = unpack('>h',classFile[6:8])
-                version -= 44
-                self.add(version, filename, file)
-
-    def do_file(self, filename):
+            self.__on_bad(cf)
+
+    def do_class(self, class_file: T.IO[bytes], filename: FileLoc) -> None:
+        version = self.__extract_version(class_file)
+        self.add(version, filename)
+
+    def do_jar(self, jar: ZipFile, jar_path: FileLoc) -> None:
+        def jar_loc(path: str) -> JarLoc:
+            return JarLoc(jar_path, path)
+
+        is_multirelease = False
+        try:
+            manifest = jar.open('META-INF/MANIFEST.MF', 'r')
+        except KeyError:
+            pass
+        else:
+            with manifest:
+                lines = [line.decode('utf-8').rstrip() for line in 
manifest.readlines()]
+                is_multirelease = 'Multi-Release: true' in lines
+
+        invalid_version_dirs: set[str] = set()
+        seen_skipped_dirs: set[str] = set()
+        for path in jar.namelist():
+            if not path.endswith('class'):
+                continue
+
+            loc = jar_loc(path)
+
+            with jar.open(path, 'r') as class_file:
+                target_version = None
+                match self.__get_multirelease_target_version(path):
+                    case int(tv):
+                        if is_multirelease:
+                            target_version = tv
+                        else:
+                            version_dir = path.split('/', 3)[:3]
+                            invalid_version_dirs.add('/'.join(version_dir))
+                            continue
+                    case (ver_dir, reason):
+                        if ver_dir not in seen_skipped_dirs:
+                            seen_skipped_dirs.add(ver_dir)
+                            self.__on_skipped(SkippedVersionDir(
+                                jar_loc(ver_dir), reason))
+                        continue
+                    case None:
+                        pass
+
+                version = self.__extract_version(class_file)
+                self.add(version, loc, target_version)
+
+        if len(invalid_version_dirs):
+            self.__on_bad(BadMultireleaseManifest(
+                jar_loc('META-INF/MANIFEST.MF'),
+                [jar_loc(d) for d in sorted(invalid_version_dirs)]))
+
+    def do(self, filename: str) -> None:
         if not os.path.islink(filename):
             if filename.endswith(".class"):
-                self.do_class(filename)
+                with open(filename, 'rb') as class_file:
+                    self.do_class(class_file, FileLoc(filename))
             if filename.endswith(".jar"):
-                self.do_jar(filename)
+                with ZipFile(filename, 'r') as jar:
+                    self.do_jar(jar, FileLoc(filename))
+
+    @classmethod
+    def __extract_version(cls, file: T.IO[bytes]) -> int:
+        data = file.read(8)
+        if len(data) != 8:
+            raise ValueError(f'Need the first 8 bytes of a java .class file, 
got: {len(data)}')
+        # A .class file is encoded like (all big-endian):
+        # u4 - magic
+        # u2 - minor version
+        # u2 - major version
+        result = unpack('>4x2xH', data)[0]
+        return result - 44
+
+    @classmethod
+    def __get_multirelease_target_version(cls, path: str) -> int | None | 
tuple[str, str]:
+        '''Get the target verion of a possible multi-release class file
+
+        Returns:
+        int target_version - If the path is under 
META-INF/versions/${target_version}
+        None - If the path is not part of META-INF/versions
+        (directory, reason) - The directory portion of `path` that should be 
ignored
+        '''
+        result = None
+
+        parts = path.split('/', 3)
+        if len(parts) >= 3 and parts[:2] == ['META-INF', 'versions']:
+            expected_version = parts[2]
+            # 
https://docs.oracle.com/en/java/javase/23/docs/specs/jar/jar.html#multi-release-jar-files
+            # If the version is not a number or < 9 it is ignored
+
+            ver_dir = '/'.join(parts[:3])
+            reasonBase = f'The version directory "{expected_version}" '
+            if not expected_version.isdecimal():
+                return (ver_dir, reasonBase + 'is not a number')
+            if (result := int(expected_version)) < 9:
+                return (ver_dir, reasonBase + 'is less than 9')
+
+        return result
+
+    __module_info_jar_pattern = 
re.compile('(META-INF/versions/[1-9][0-9]*/)?module-info.class')
+    __module_info_file_pattern = re.compile('module-info.class')
+
+    @classmethod
+    def __is_module_info(cls, filepath: Loc) -> bool:
+        match filepath:
+            case FileLoc(target):
+                ptn = cls.__module_info_file_pattern
+            case JarLoc(member=target):
+                ptn = cls.__module_info_jar_pattern
+
+        return ptn.fullmatch(target) is not None
+
+    @classmethod
+    def __format_version(cls, version: int) -> str:
+        return f'1.{version}' if version < 9 else f'{version}'
+
+    def __on_good(self, goodFile: GoodFile) -> None:
+        self.good.append(goodFile)
+
+    def __on_bad(self, badFile: BadFile) -> None:
+        self.bad.append(badFile)
+
+    def __on_skipped(self, skippedFile: SkippedFile) -> None:
+        self.skipped.append(skippedFile)
 
 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 nowrap:

diff --git a/src/javatoolkit/scripts/class_version_verify.py 
b/src/javatoolkit/scripts/class_version_verify.py
index 1394146..fd544d3 100755
--- a/src/javatoolkit/scripts/class_version_verify.py
+++ b/src/javatoolkit/scripts/class_version_verify.py
@@ -6,10 +6,10 @@
 import os
 import sys
 from optparse import OptionParser, make_option
-from ..cvv import CVVMagic
+from .. import cvv
 
 
-def main():
+def main() -> None:
     options_list = [
         make_option(
             "-r",
@@ -30,7 +30,7 @@ def main():
             action="store_true",
             dest="verbose",
             default=False,
-            help="Print version of every class"),
+            help="Print details about analyzed files"),
         make_option(
             "-s",
             "--silent",
@@ -56,35 +56,38 @@ def main():
         print("-t is mandatory")
         sys.exit(2)
 
-    cvv_magic = CVVMagic(options.version)
+    cvv_magic = cvv.CVVMagic(options.version)
 
     for arg in args:
         if os.path.isfile(arg):
-            cvv_magic.do_file(arg)
+            cvv_magic.do(arg)
 
         if options.deep and os.path.isdir(arg):
             for root, dirs, files in os.walk(arg):
                 for filename in files:
-                    cvv_magic.do_file("%s/%s" % (root, filename))
+                    cvv_magic.do("%s/%s" % (root, filename))
 
     if options.file_only:
-        lst = set([set[1] for set in cvv_magic.bad])
+        lst = set()
+        for info in cvv_magic.bad:
+            match info.loc:
+                case cvv.FileLoc(path) | cvv.JarLoc(cvv.FileLoc(path), _):
+                    lst.add(path)
         for i in lst:
             print(i)
     else:
         if options.verbose:
-            for set in cvv_magic.good:
-                print("Good: %s %s %s" % set)
+            for good in cvv_magic.good:
+                print(__format_good(good))
 
         if not options.silent:
-            for set in cvv_magic.bad:
-                print("Bad: %s %s %s" % set)
-            for set in cvv_magic.skipped:
-                print("Skipped: %s %s %s" % set)
+            for bad in cvv_magic.bad:
+                print(__format_bad(bad))
+            for skipped in cvv_magic.skipped:
+                print(__format_skipped(skipped))
 
-        print("CVV: %s\nChecked: %i Good: %i Bad: %i Skipped: %i" %
-              (options.version, len(cvv_magic.good) + len(cvv_magic.bad) + 
len(cvv_magic.skipped),
-              len(cvv_magic.good), len(cvv_magic.bad), len(cvv_magic.skipped)))
+        print(f'CVV: {options.version}')
+        print(__get_total_line(cvv_magic))
 
     if len(cvv_magic.bad) > 0:
         sys.exit(1)
@@ -92,5 +95,50 @@ def main():
         sys.exit(0)
 
 
+def __get_total_line(cvv_magic: cvv.CVVMagic) -> str:
+    good = len(cvv_magic.good)
+    bad = len(cvv_magic.bad)
+    skipped = len(cvv_magic.skipped)
+    total = good + bad + skipped
+    return f'Checked: {total} Good: {good} Bad: {bad} Skipped: {skipped}'
+
+
+def __format_class(class_file: cvv.ClassFile) -> str:
+    return f'{class_file.encoded_version} {__format_loc(class_file.loc)}'
+
+
+def __format_skipped(f: cvv.SkippedFile) -> str:
+    msg: str
+    match f:
+        case cvv.SkippedModuleInfo() as cf:
+            msg = __format_class(cf)
+        case cvv.SkippedVersionDir(loc, reason):
+            msg = f'{__format_loc(loc)} because "{reason}"'
+    return f'Skipped: {msg}'
+
+
+def __format_bad(f: cvv.BadFile) -> str:
+    msg: str
+    match f:
+        case cvv.ClassFile():
+            msg = f'{__format_class(f)}'
+        case cvv.BadMultireleaseManifest(loc, multiReleaseDirs):
+            plain_dirs = [d.member for d in multiReleaseDirs]
+            msg = f'{__format_loc(loc)} missing "Multi-Release: true" for 
{plain_dirs}'
+    return f'Bad: {msg}'
+
+
+def __format_good(f: cvv.GoodFile) -> str:
+    return f'Good: {__format_class(f)}'
+
+
+def __format_loc(loc: cvv.Loc) -> str:
+    match loc:
+        case cvv.FileLoc(path):
+            return f'None {path}'
+        case cvv.JarLoc(jar, member):
+            return f'{jar.path} {member}'
+
+
 if __name__ == '__main__':
     main()

Reply via email to