Source: diffoscope
Version: ada1a1dcdc19217fb611e0a1e57bc3744399aefa
Severity: wishlist
Tags: patch
It would be useful for diffoscope to output differences in omni.ja files as
for other Zip files, instead of ending up with a diff of an hexdump.
The attached patch implements a minimal support for this. It however doesn't
look at the difference in the `preload` value.
-- System Information:
Debian Release: stretch/sid
APT prefers unstable
APT policy: (500, 'unstable'), (1, 'experimental')
Architecture: amd64 (x86_64)
Foreign Architectures: i386
Kernel: Linux 4.2.0-1-amd64 (SMP w/4 CPU cores)
Locale: LANG=ja_JP.UTF-8, LC_CTYPE=ja_JP.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
diff --git a/diffoscope/comparators/__init__.py b/diffoscope/comparators/__init__.py
index b4615c9..b5dd320 100644
--- a/diffoscope/comparators/__init__.py
+++ b/diffoscope/comparators/__init__.py
@@ -72,7 +72,7 @@ from diffoscope.comparators.symlink import Symlink
from diffoscope.comparators.text import TextFile
from diffoscope.comparators.tar import TarFile
from diffoscope.comparators.xz import XzFile
-from diffoscope.comparators.zip import ZipFile
+from diffoscope.comparators.zip import ZipFile, MozillaZipFile
def bail_if_non_existing(*paths):
@@ -154,6 +154,7 @@ FILE_CLASSES = (
TarFile,
XzFile,
ZipFile,
+ MozillaZipFile,
ImageFile,
)
diff --git a/diffoscope/comparators/zip.py b/diffoscope/comparators/zip.py
index ecdc77b..42c9a9f 100644
--- a/diffoscope/comparators/zip.py
+++ b/diffoscope/comparators/zip.py
@@ -111,3 +111,54 @@ class ZipFile(File):
zipinfo_difference = Difference.from_command(Zipinfo, self.path, other.path) or \
Difference.from_command(ZipinfoVerbose, self.path, other.path)
return [zipinfo_difference]
+
+
+class MozillaZipCommandMixin(object):
+ def wait(self):
+ # zipinfo emits an error when reading Mozilla-optimized ZIPs,
+ # which is fine to ignore.
+ super(Zipinfo, self).wait()
+ return 0
+
+
+class MozillaZipinfo(MozillaZipCommandMixin, Zipinfo): pass
+
+
+class MozillaZipinfoVerbose(MozillaZipCommandMixin, ZipinfoVerbose): pass
+
+
+class MozillaZipContainer(ZipContainer):
+ def open_archive(self):
+ # This is gross: Monkeypatch zipfile._EndRecData to work with
+ # Mozilla-optimized ZIPs
+ _orig_EndRecData = zipfile._EndRecData
+ def _EndRecData(fh):
+ endrec = _orig_EndRecData(fh)
+ if endrec:
+ endrec[zipfile._ECD_LOCATION] = (endrec[zipfile._ECD_OFFSET] +
+ endrec[zipfile._ECD_SIZE])
+ return endrec
+ zipfile._EndRecData = _EndRecData
+ result = super(MozillaZipContainer, self).open_archive()
+ zipfile._EndRecData = _orig_EndRecData
+ return result
+
+
+class MozillaZipFile(File):
+ CONTAINER_CLASS = MozillaZipContainer
+
+ @staticmethod
+ def recognizes(file):
+ # Mozilla-optimized ZIPs start with a 32-bit little endian integer
+ # indicating the amount of data to preload, followed by the ZIP
+ # central directory (with a PK\x01\x02 signature)
+ with open(file.path, 'rb') as f:
+ preload = f.read(4)
+ if len(preload) == 4:
+ signature = f.read(4)
+ return signature == b'PK\x01\x02'
+
+ def compare_details(self, other, source=None):
+ zipinfo_difference = Difference.from_command(MozillaZipinfo, self.path, other.path) or \
+ Difference.from_command(MozillaZipinfoVerbose, self.path, other.path)
+ return [zipinfo_difference]