poppler/PDFDoc.cc | 24 ++++++++++++---- poppler/XRef.cc | 80 +++++++++++++++++++++++++++++++----------------------- poppler/XRef.h | 5 ++- 3 files changed, 68 insertions(+), 41 deletions(-)
New commits: commit 7fc3c21a8c5d6cf8517100427b182887a9569ed0 Author: Ilya Gorenbein <[email protected]> Date: Fri Aug 20 20:24:31 2010 +0100 Fix failure to parse PDF with damaged internal structure Patch in bug 29189, fixes bug 3870 diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 33a2b4d..e4ac639 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -23,6 +23,7 @@ // Copyright (C) 2009 Axel Struebing <[email protected]> // Copyright (C) 2010 Hib Eris <[email protected]> // Copyright (C) 2010 Jakub Wilk <[email protected]> +// Copyright (C) 2010 Ilya Gorenbein <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -205,8 +206,10 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) { // check header checkHeader(); + GBool wasReconstructed = false; + // read xref table - xref = new XRef(str); + xref = new XRef(str, &wasReconstructed); if (!xref->isOk()) { error(-1, "Couldn't read xref table"); errCode = xref->getErrorCode(); @@ -221,10 +224,21 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) { // read catalog catalog = new Catalog(xref); - if (!catalog->isOk()) { - error(-1, "Couldn't read page catalog"); - errCode = errBadCatalog; - return gFalse; + if (catalog && !catalog->isOk()) { + if (!wasReconstructed) + { + // try one more time to contruct the Catalog, maybe the problem is damaged XRef + delete catalog; + delete xref; + xref = new XRef(str, NULL, true); + catalog = new Catalog(xref); + } + + if (catalog && !catalog->isOk()) { + error(-1, "Couldn't read page catalog"); + errCode = errBadCatalog; + return gFalse; + } } // done diff --git a/poppler/XRef.cc b/poppler/XRef.cc index a9cf571..0cd4be0 100644 --- a/poppler/XRef.cc +++ b/poppler/XRef.cc @@ -18,7 +18,7 @@ // Copyright (C) 2006, 2008, 2010 Albert Astals Cid <[email protected]> // Copyright (C) 2007-2008 Julien Rebetez <[email protected]> // Copyright (C) 2007 Carlos Garcia Campos <[email protected]> -// Copyright (C) 2009 Ilya Gorenbein <[email protected]> +// Copyright (C) 2009, 2010 Ilya Gorenbein <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -268,7 +268,7 @@ XRef::XRef() { objStrs = new PopplerCache(5); } -XRef::XRef(BaseStream *strA) { +XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) { Guint pos; Object obj; @@ -289,43 +289,50 @@ XRef::XRef(BaseStream *strA) { start = str->getStart(); pos = getStartXref(); - // if there was a problem with the 'startxref' position, try to - // reconstruct the xref table - if (pos == 0) { - if (!(ok = constructXRef())) { - errCode = errDamaged; - return; - } - - // read the xref table - } else { - GooVector<Guint> followedXRefStm; - while (readXRef(&pos, &followedXRefStm)) ; + if (reconstruct && !(ok = constructXRef(wasReconstructed))) + { + errCode = errDamaged; + return; + } + else + { + // if there was a problem with the 'startxref' position, try to + // reconstruct the xref table + if (pos == 0) { + if (!(ok = constructXRef(wasReconstructed))) { + errCode = errDamaged; + return; + } - // if there was a problem with the xref table, - // try to reconstruct it - if (!ok) { - if (!(ok = constructXRef())) { - errCode = errDamaged; - return; + // read the xref table + } else { + GooVector<Guint> followedXRefStm; + while (readXRef(&pos, &followedXRefStm)) ; + + // if there was a problem with the xref table, + // try to reconstruct it + if (!ok) { + if (!(ok = constructXRef(wasReconstructed))) { + errCode = errDamaged; + return; + } } } - } - // get the root dictionary (catalog) object - trailerDict.dictLookupNF("Root", &obj); - if (obj.isRef()) { - rootNum = obj.getRefNum(); - rootGen = obj.getRefGen(); - obj.free(); - } else { - obj.free(); - if (!(ok = constructXRef())) { - errCode = errDamaged; - return; + // get the root dictionary (catalog) object + trailerDict.dictLookupNF("Root", &obj); + if (obj.isRef()) { + rootNum = obj.getRefNum(); + rootGen = obj.getRefGen(); + obj.free(); + } else { + obj.free(); + if (!(ok = constructXRef(wasReconstructed))) { + errCode = errDamaged; + return; + } } } - // now set the trailer dictionary's xref pointer so we can fetch // indirect objects from it trailerDict.getDict()->setXRef(this); @@ -746,7 +753,7 @@ GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) { } // Attempt to construct an xref table for a damaged file. -GBool XRef::constructXRef() { +GBool XRef::constructXRef(GBool *wasReconstructed) { Parser *parser; Object newTrailerDict, obj; char buf[256]; @@ -769,6 +776,11 @@ GBool XRef::constructXRef() { gotRoot = gFalse; streamEndsLen = streamEndsSize = 0; + if (wasReconstructed) + { + *wasReconstructed = true; + } + str->reset(); while (1) { pos = str->getPos(); diff --git a/poppler/XRef.h b/poppler/XRef.h index be19e23..1f4ec6a 100644 --- a/poppler/XRef.h +++ b/poppler/XRef.h @@ -17,6 +17,7 @@ // Copyright (C) 2006, 2008, 2010 Albert Astals Cid <[email protected]> // Copyright (C) 2007-2008 Julien Rebetez <[email protected]> // Copyright (C) 2007 Carlos Garcia Campos <[email protected]> +// Copyright (C) 2010 Ilya Gorenbein <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -63,7 +64,7 @@ public: // Constructor, create an empty XRef, used for PDF writing XRef(); // Constructor. Read xref table from stream. - XRef(BaseStream *strA); + XRef(BaseStream *strA, GBool *wasReconstructed = NULL, GBool reconstruct = false); // Destructor. ~XRef(); @@ -161,7 +162,7 @@ private: GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm); GBool readXRefStreamSection(Stream *xrefStr, int *w, int first, int n); GBool readXRefStream(Stream *xrefStr, Guint *pos); - GBool constructXRef(); + GBool constructXRef(GBool *wasReconstructed); Guint strToUnsigned(char *s); }; _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
