Source: pyx
Version: 0.12.1-4
Severity: wishlist
Tags: patch upstream
User: reproducible-bui...@lists.alioth.debian.org
Usertags: timestamps randomness
X-Debbugs-Cc: reproducible-bui...@lists.alioth.debian.org

Dear Maintainer,

While working on the `€œreproducible builds'€ effort [1], we have noticed
that 'pyx' could not be built reproducibly.

Please find attached proposed solutions for the following issues:
* timestamps in the produced ps/pdf files can be made reproducible using
the SOURCE_DATE_EPOCH environment variable [2] (when set), that
corresponds to the last debian/changelog entry date when building a
debian package. See reproducible-timestamps.patch
* image names include memory addresses. They can eg. be replaced by some
hash of the image itself, see reproducible-image-name.patch

Once applied, pyx can be built reproducibly in our current
experimental framework.

Regards,
Alexis Bienvenüe.

 [1]: https://wiki.debian.org/ReproducibleBuilds
 [2]: https://reproducible-builds.org/specs/source-date-epoch/


Description: Honour SOURCE_DATE_EPOCH
 Honour the SOURCE_DATE_EPOCH environment variable to get reproducible
 timestamps if it is set.
 See https://reproducible-builds.org/specs/source-date-epoch/
Author: Alexis Bienvenüe <p...@passoire.fr>

Index: pyx-0.12.1/pyx/pdfwriter.py
===================================================================
--- pyx-0.12.1.orig/pyx/pdfwriter.py
+++ pyx-0.12.1/pyx/pdfwriter.py
@@ -29,7 +29,7 @@ except:
     haszlib = 0
 
 import bbox, config, style, unit, version, trafo
-
+import os
 
 
 class PDFregistry:
@@ -161,13 +161,18 @@ class PDFinfo(PDFobject):
         PDFobject.__init__(self, "info")
 
     def write(self, file, writer, registry):
-        if time.timezone < 0:
-            # divmod on positive numbers, otherwise the minutes have a different sign from the hours
-            timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
-        elif time.timezone > 0:
-            timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
-        else:
+        if os.environ.get('SOURCE_DATE_EPOCH'):
+            creation_date = time.gmtime(int(os.environ.get('SOURCE_DATE_EPOCH')))
             timezone = "Z00'00'"
+        else:
+            if time.timezone < 0:
+                # divmod on positive numbers, otherwise the minutes have a different sign from the hours
+                timezone = "-%02i'%02i'" % divmod(-time.timezone/60, 60)
+            elif time.timezone > 0:
+                timezone = "+%02i'%02i'" % divmod(time.timezone/60, 60)
+            else:
+                timezone = "Z00'00'"
+            creation_date = time.localtime()
 
         def pdfstring(s):
             r = ""
@@ -188,7 +193,8 @@ class PDFinfo(PDFobject):
         if writer.keywords:
             file.write("/Keywords (%s)\n" % pdfstring(writer.keywords))
         file.write("/Creator (PyX %s)\n" % version.version)
-        file.write("/CreationDate (D:%s%s)\n" % (time.strftime("%Y%m%d%H%M"), timezone))
+        file.write("/CreationDate (D:%s%s)\n" %
+                   (time.strftime("%Y%m%d%H%M",creation_date), timezone))
         file.write(">>\n")
 
 
Index: pyx-0.12.1/pyx/pswriter.py
===================================================================
--- pyx-0.12.1.orig/pyx/pswriter.py
+++ pyx-0.12.1/pyx/pswriter.py
@@ -22,6 +22,7 @@
 
 import cStringIO, copy, time, math
 import bbox, config, style, version, unit, trafo
+import os
 
 
 class PSregistry:
@@ -107,11 +108,15 @@ class _PSwriter:
         self.encodings = {}
 
     def writeinfo(self, file):
+        if os.environ.get('SOURCE_DATE_EPOCH'):
+            creation_date = time.gmtime(int(os.environ.get('SOURCE_DATE_EPOCH')))
+        else:
+            creation_date = time.localtime()
         file.write("%%%%Creator: PyX %s\n" % version.version)
         if self.title is not None:
             file.write("%%%%Title: %s\n" % self.title)
         file.write("%%%%CreationDate: %s\n" %
-                   time.asctime(time.localtime(time.time())))
+                   time.asctime(creation_date))
 
     def getfontmap(self):
         if self._fontmap is None:
Description: Reproducible image name
 Use data hash instead of memory address to build internal image names,
 to get a reproducible result.
Author: Alexis Bienvenüe <p...@passoire.fr>

Index: pyx-0.12.1/pyx/bitmap.py
===================================================================
--- pyx-0.12.1.orig/pyx/bitmap.py
+++ pyx-0.12.1/pyx/bitmap.py
@@ -27,7 +27,7 @@ try:
 except:
     haszlib = 0
 
-import bbox, canvasitem, pswriter, pdfwriter, trafo, unit
+import bbox, canvasitem, pswriter, pdfwriter, trafo, unit, hashlib
 
 devicenames = {"L": "/DeviceGray",
                "RGB": "/DeviceRGB",
@@ -314,7 +314,7 @@ class bitmap_trafo(canvasitem.canvasitem
     def imagedata(self, interleavealpha):
         """internal function
 
-        returns a tuple (mode, data, alpha, palettemode, palettedata)
+        returns a tuple (mode, data, alpha, palettemode, palettedata, imagehash)
         where mode does not contain antialiasing anymore
         """
 
@@ -383,7 +383,9 @@ class bitmap_trafo(canvasitem.canvasitem
             else:
                 alpha = alpha.tostring()
 
-        return mode, data, alpha, palettemode, palettedata
+        imagehash = hashlib.sha1(data).hexdigest()[0:15]
+
+        return mode, data, alpha, palettemode, palettedata, imagehash
 
     def bbox(self):
         bb = bbox.empty()
@@ -394,14 +396,14 @@ class bitmap_trafo(canvasitem.canvasitem
         return bb
 
     def processPS(self, file, writer, context, registry, bbox):
-        mode, data, alpha, palettemode, palettedata = self.imagedata(True)
+        mode, data, alpha, palettemode, palettedata, imagehash = self.imagedata(True)
         pstrafo = trafo.translate_pt(0, -1.0).scaled(self.imagewidth, -self.imageheight)*self.pdftrafo.inverse()
 
         PSsinglestring = self.PSstoreimage and len(data) < self.PSmaxstrlen
         if PSsinglestring:
-            PSimagename = "image-%d-%s-singlestring" % (id(self.image), self.compressmode)
+            PSimagename = "image-%s-%s-singlestring" % (imagehash, self.compressmode)
         else:
-            PSimagename = "image-%d-%s-stringarray" % (id(self.image), self.compressmode)
+            PSimagename = "image-%s-%s-stringarray" % (imagehash, self.compressmode)
 
         if self.PSstoreimage and not PSsinglestring:
             registry.add(pswriter.PSdefinition("imagedataaccess",
@@ -489,9 +491,9 @@ class bitmap_trafo(canvasitem.canvasitem
         file.write("grestore\n")
 
     def processPDF(self, file, writer, context, registry, bbox):
-        mode, data, alpha, palettemode, palettedata = self.imagedata(False)
+        mode, data, alpha, palettemode, palettedata, imagehash = self.imagedata(False)
 
-        name = "image-%d-%s" % (id(self.image), self.compressmode or self.imagecompressed)
+        name = "image-%s-%s" % (imagehash, self.compressmode or self.imagecompressed)
         if alpha:
             alpha = PDFimage("%s-smask" % name, self.imagewidth, self.imageheight,
                              None, None, "L", 8,

Reply via email to