src/docrecord.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/docsprm.py | 11 ++++++ src/docstream.py | 8 ++++ 3 files changed, 110 insertions(+), 2 deletions(-)
New commits: commit 044364980ff76108d0cc479fec40ae031110ac51 Author: Miklos Vajna <[email protected]> Date: Fri Nov 9 17:26:31 2012 +0100 dump Chpx diff --git a/src/docrecord.py b/src/docrecord.py index 0694cd8..4aaeaa6 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -131,8 +131,12 @@ class Sprm(DOCDirStream): 4: 'section', 5: 'table' } + nameMap = { + 1: docsprm.parMap, + 2: docsprm.chrMap, + } print '<sprm value="%s" name="%s" ispmd="%s" fSpec="%s" sgc="%s" spra="%s" operandSize="%s" operand="%s"/>' % ( - hex(self.sprm), docsprm.parMap[self.sprm], hex(self.ispmd), hex(self.fSpec), sgcmap[self.sgc], hex(self.spra), self.getOperandSize(), hex(self.operand) + hex(self.sprm), nameMap[self.sgc][self.sprm], hex(self.ispmd), hex(self.fSpec), sgcmap[self.sgc], hex(self.spra), self.getOperandSize(), hex(self.operand) ) def getOperandSize(self): @@ -172,6 +176,23 @@ class GrpPrlAndIstd(DOCDirStream): pos += prl.getSize() print '</grpPrlAndIstd>' +class Chpx(DOCDirStream): + """The Chpx structure specifies a set of properties for text.""" + def __init__(self, bytes, mainStream, offset): + DOCDirStream.__init__(self, bytes) + self.pos = offset + + def dump(self): + print '<chpx type="Chpx" offset="%d">' % self.pos + self.printAndSet("cb", self.getInt8()) + self.pos += 1 + pos = self.pos + while (self.cb - (pos - self.pos)) > 0: + prl = Prl(self.bytes, pos) + prl.dump() + pos += prl.getSize() + print '</chpx>' + class PapxInFkp(DOCDirStream): """The PapxInFkp structure specifies a set of text properties.""" def __init__(self, bytes, mainStream, offset): @@ -229,6 +250,10 @@ class ChpxFkp(DOCDirStream): pos += 4 # rgbx + offset = PLC.getPLCOffset(self.pos, self.crun, 1, i) + chpxOffset = self.getInt8(pos = offset) * 2 + chpx = Chpx(self.bytes, self.mainStream, self.pos + chpxOffset) + chpx.dump() print '</rgfc>' self.printAndSet("crun", self.crun) diff --git a/src/docsprm.py b/src/docsprm.py index b60d782..32e52d8 100755 --- a/src/docsprm.py +++ b/src/docsprm.py @@ -95,4 +95,15 @@ parMap = { 0x2471: "sprmPTtwo", } +# TODO incomplete +chrMap = { + 0x4A43: "sprmCHps", + 0x6816: "sprmCRsidText", + 0x4A4F: "sprmCRgFtc0", + 0x4A51: "sprmCRgFtc2", + 0x4A61: "sprmCHpsBi", + 0x0835: "sprmCFBold", + 0x085C: "sprmCFBoldBi", + } + # vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab: commit e0176f0bbd9012e59e66df437a3069b3362c3902 Author: Miklos Vajna <[email protected]> Date: Fri Nov 9 17:06:59 2012 +0100 dump ChpxFkp diff --git a/src/docrecord.py b/src/docrecord.py index a817a33..0694cd8 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -209,6 +209,31 @@ class BxPap(DOCDirStream): def getSize(): return 13 # in bytes, see 2.9.23 +class ChpxFkp(DOCDirStream): + """The ChpxFkp structure maps text to its character properties.""" + def __init__(self, bytes, mainStream, offset, size): + DOCDirStream.__init__(self, mainStream.bytes) + self.pos = offset + self.size = size + + def dump(self): + print '<chpxFkp type="ChpxFkp" offset="%d" size="%d bytes">' % (self.pos, self.size) + self.crun = self.getInt8(pos = self.pos + self.size - 1) + pos = self.pos + for i in range(self.crun): + # rgfc + start = self.getInt32(pos = pos) + end = self.getInt32(pos = pos + 4) + print '<rgfc index="%d" start="%d" end="%d">' % (i, start, end) + print '<transformed value="%s"/>' % globals.encodeName(self.bytes[start:end]) + pos += 4 + + # rgbx + print '</rgfc>' + + self.printAndSet("crun", self.crun) + print '</chpxFkp>' + class PapxFkp(DOCDirStream): """The PapxFkp structure maps paragraphs, table rows, and table cells to their properties.""" def __init__(self, bytes, mainStream, offset, size): @@ -250,6 +275,8 @@ class PnFkpChpx(DOCDirStream): buf = self.getInt32() self.pos += 4 self.printAndSet("pn", buf & (2**22-1)) + chpxFkp = ChpxFkp(self.bytes, self.mainStream, self.pn*512, 512) + chpxFkp.dump() print '</%s>' % self.name class PnFkpPapx(DOCDirStream): commit 6fd34191f47b5b18d143459d143d96508af6923f Author: Miklos Vajna <[email protected]> Date: Fri Nov 9 17:03:40 2012 +0100 dump PlcBteChpx and PnFkpChpx diff --git a/src/docrecord.py b/src/docrecord.py index bc83750..a817a33 100755 --- a/src/docrecord.py +++ b/src/docrecord.py @@ -237,6 +237,21 @@ class PapxFkp(DOCDirStream): self.printAndSet("cpara", self.cpara) print '</papxFkp>' +class PnFkpChpx(DOCDirStream): + """The PnFkpChpx structure specifies the location in the WordDocument Stream of a ChpxFkp structure.""" + def __init__(self, bytes, mainStream, offset, size, name): + DOCDirStream.__init__(self, bytes, mainStream=mainStream) + self.pos = offset + self.size = size + self.name = name + + def dump(self): + print '<%s type="PnFkpChpx" offset="%d" size="%d bytes">' % (self.name, self.pos, self.size) + buf = self.getInt32() + self.pos += 4 + self.printAndSet("pn", buf & (2**22-1)) + print '</%s>' % self.name + class PnFkpPapx(DOCDirStream): """The PnFkpPapx structure specifies the offset of a PapxFkp in the WordDocument Stream.""" def __init__(self, bytes, mainStream, offset, size, name): @@ -254,6 +269,30 @@ class PnFkpPapx(DOCDirStream): papxFkp.dump() print '</%s>' % self.name +class PlcBteChpx(DOCDirStream, PLC): + """The PlcBteChpx structure is a PLC that maps the offsets of text in the WordDocument stream to the character properties of that text.""" + def __init__(self, bytes, mainStream, offset, size): + DOCDirStream.__init__(self, bytes, mainStream=mainStream) + PLC.__init__(self, size, 4) + self.pos = offset + self.size = size + + def dump(self): + print '<plcBteChpx type="PlcBteChpx" offset="%d" size="%d bytes">' % (self.pos, self.size) + pos = self.pos + for i in range(self.getElements()): + # aFC + start = self.getInt32(pos = pos) + end = self.getInt32(pos = pos + 4) + print '<aFC index="%d" start="%d" end="%d">' % (i, start, end) + pos += 4 + + # aPnBteChpx + aPnBteChpx = PnFkpChpx(self.bytes, self.mainStream, self.getOffset(self.pos, i), 4, "aPnBteChpx") + aPnBteChpx.dump() + print '</aFC>' + print '</plcBteChpx>' + class PlcBtePapx(DOCDirStream, PLC): """The PlcBtePapx structure is a PLC that specifies paragraph, table row, or table cell properties.""" def __init__(self, bytes, mainStream, offset, size): diff --git a/src/docstream.py b/src/docstream.py index 16657df..9aeb9c0 100755 --- a/src/docstream.py +++ b/src/docstream.py @@ -212,7 +212,7 @@ class WordDocumentStream(DOCDirStream): ["fcPlcfHdd"], ["lcbPlcfHdd"], ["fcPlcfBteChpx"], - ["lcbPlcfBteChpx"], + ["lcbPlcfBteChpx", self.handleLcbPlcfBteChpx], ["fcPlcfBtePapx"], ["lcbPlcfBtePapx", self.handleLcbPlcfBtePapx], ["fcPlcfSea"], @@ -387,6 +387,12 @@ class WordDocumentStream(DOCDirStream): clx = docrecord.Clx(self.doc.getDirectoryStreamByName("1Table").bytes, self, offset, size) clx.dump() + def handleLcbPlcfBteChpx(self): + offset = self.fcPlcfBteChpx + size = self.lcbPlcfBteChpx + plcBteChpx = docrecord.PlcBteChpx(self.doc.getDirectoryStreamByName("1Table").bytes, self, offset, size) + plcBteChpx.dump() + def handleLcbPlcfBtePapx(self): offset = self.fcPlcfBtePapx size = self.lcbPlcfBtePapx _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
