commit 048e05ee3ef3735afbf558ea0bc42127572010a9
Author: Jean-Francois Dockes <jf@dockes.org>
Date:   Mon Nov 18 10:31:43 2013 +0100

    Account for 8bit characters inside TextBytesAtom
    
    The Microsoft documentation does not specify an encoding for TextBytesAtom,
    but it says that the bytes are the "low byte of a character in the Unicode
    character set with the high byte considered equal to zero". As characters
    from the latin1 set are the only ones which would not be changed by this
    operation, it seems reasonable to assume that the encoding is actually
    cp1252/windows-1252, which is what is used by the new version.

diff --git a/src/pptrecord.py b/src/pptrecord.py
index ae7bad7..f29d590 100644
--- a/src/pptrecord.py
+++ b/src/pptrecord.py
@@ -117,6 +117,10 @@ class String(BaseRecordHandler):
     def parseBytes (self):
         name = globals.getTextBytes(self.readRemainingBytes())
         self.appendProperty(name)
+        # The MS doc says that the bytes are the low bytes of unicode
+        # chars, with the high byte ignored. Only latin1 could stand
+        # this transformation.
+        name = name.decode('cp1252').encode('UTF-8')
         self.appendLine("text: '%s'"%name)
 
 def ShapeString (*args):
