poppler/Lexer.cc | 45 +++++++++++++++++++++++++++++++++++++++++++++ poppler/Lexer.h | 2 ++ poppler/Parser.cc | 28 ++++++++++++++++++++++++++-- poppler/Parser.h | 2 ++ 4 files changed, 75 insertions(+), 2 deletions(-)
New commits: commit e1ffa9100cf6b4a444be7ed76b11698a5c5bb441 Author: Thomas Freitag <[email protected]> Date: Sat Apr 6 23:21:58 2013 +0200 Fix endstream detection Part 1 of bug #62985 the endstream search, and at least with bug-poppler16579.pdf this doesn't work correctly: the shift(-1) with the used token mechanism in Lexer isn't correct for a binary data stream. If there is i.e. a "(" without corresponding ")" in the binary data, which of course can happen and happens in that pdf, shift(-1) skips the searched endstream and can therefore in worst case reach the end-of-file. Therefore I implemented a shift("endstream") in Java, which I now port back to C++, or in other words "There and Back Again" :-) You can test it with bug-poppler16579.pdf if You just change temporary if (longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') { in XRef.cc to if (gFalse && longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') { diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc index a0bb35e..4e9ea12 100644 --- a/poppler/Lexer.cc +++ b/poppler/Lexer.cc @@ -17,6 +17,7 @@ // Copyright (C) 2006 Krzysztof Kowalczyk <[email protected]> // Copyright (C) 2010 Carlos Garcia Campos <[email protected]> // Copyright (C) 2012, 2013 Adrian Johnson <[email protected]> +// Copyright (C) 2013 Thomas Freitag <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -580,6 +581,50 @@ Object *Lexer::getObj(Object *obj, int objNum) { return obj; } +Object *Lexer::getObj(Object *obj, const char *cmdA) { + char *p; + int c, c2; + GBool comment, done; + int numParen; + GooString *s; + int n, m; + + // skip whitespace and comments + comment = gFalse; + const char *cmd1 = tokBuf; + *tokBuf = 0; + while (strcmp(cmdA, cmd1)) { + while (1) { + if ((c = getChar()) == EOF) { + return obj->initEOF(); + } + if (comment) { + if (c == '\r' || c == '\n') { + comment = gFalse; + } + } else if (c == '%') { + comment = gTrue; + } else if (specialChars[c] != 1) { + break; + } + } + p = tokBuf; + *p++ = c; + n = 1; + while ((c = lookChar()) != EOF && specialChars[c] == 0) { + getChar(); + if (++n == tokBufSize) { + break; + } + *p++ = c; + } + *p = '\0'; + } + obj->initCmd(tokBuf); + + return obj; +} + void Lexer::skipToNextLine() { int c; diff --git a/poppler/Lexer.h b/poppler/Lexer.h index 227508f..d9c23dc 100644 --- a/poppler/Lexer.h +++ b/poppler/Lexer.h @@ -16,6 +16,7 @@ // Copyright (C) 2006, 2007, 2010 Albert Astals Cid <[email protected]> // Copyright (C) 2006 Krzysztof Kowalczyk <[email protected]> // Copyright (C) 2013 Adrian Johnson <[email protected]> +// Copyright (C) 2013 Thomas Freitag <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -56,6 +57,7 @@ public: // Get the next object from the input stream. Object *getObj(Object *obj, int objNum = -1); + Object *getObj(Object *obj, const char *cmdA); // Skip to the beginning of the next line in the input stream. void skipToNextLine(); diff --git a/poppler/Parser.cc b/poppler/Parser.cc index b66203f..0370564 100644 --- a/poppler/Parser.cc +++ b/poppler/Parser.cc @@ -242,7 +242,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey, // refill token buffers and check for 'endstream' shift(); // kill '>>' - shift(); // kill 'stream' + shift("endstream"); // kill 'stream' if (buf1.isCmd("endstream")) { shift(); } else { @@ -251,7 +251,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey, if (xref) { // shift until we find the proper endstream or we change to another object or reach eof while (!buf1.isCmd("endstream") && xref->getNumEntry(lexer->getPos()) == objNum && !buf1.isEOF()) { - shift(); + shift("endstream"); } length = lexer->getPos() - pos; if (buf1.isCmd("endstream")) { @@ -302,3 +302,27 @@ void Parser::shift(int objNum) { else lexer->getObj(&buf2, objNum); } + +void Parser::shift(const char *cmdA) { + if (inlineImg > 0) { + if (inlineImg < 2) { + ++inlineImg; + } else { + // in a damaged content stream, if 'ID' shows up in the middle + // of a dictionary, we need to reset + inlineImg = 0; + } + } else if (buf2.isCmd("ID")) { + lexer->skipChar(); // skip char after 'ID' command + inlineImg = 1; + } + buf1.free(); + buf2.shallowCopy(&buf1); + if (inlineImg > 0) { + buf2.initNull(); + } else if (buf1.isCmd(cmdA)) { + lexer->getObj(&buf2, -1); + } else { + lexer->getObj(&buf2, cmdA); + } +} diff --git a/poppler/Parser.h b/poppler/Parser.h index adaf913..9702716 100644 --- a/poppler/Parser.h +++ b/poppler/Parser.h @@ -16,6 +16,7 @@ // Copyright (C) 2006, 2010 Albert Astals Cid <[email protected]> // Copyright (C) 2012 Hib Eris <[email protected]> // Copyright (C) 2013 Adrian Johnson <[email protected]> +// Copyright (C) 2013 Thomas Freitag <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -74,6 +75,7 @@ private: int objNum, int objGen, int recursion, GBool strict); void shift(int objNum = -1); + void shift(const char *cmdA); }; #endif _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
