Hi!
Here is a patch I made (against 0.54.0 - but it applies cleanly to
0.56.0) to handle pdfs that use unknown type names.
Regards, Jan-Erik
diff -aur orig-poppler-0.54.0/poppler/GlobalParams.cc poppler-0.54.0/poppler/GlobalParams.cc
--- orig-poppler-0.54.0/poppler/GlobalParams.cc 2016-12-06 22:48:27.000000000 +0000
+++ poppler-0.54.0/poppler/GlobalParams.cc 2017-06-08 12:10:12.444036161 +0000
@@ -607,6 +607,7 @@
#endif
textPageBreaks = gTrue;
textKeepTinyChars = gFalse;
+ handleStructElemError = gFalse;
enableFreeType = gTrue;
strokeAdjust = gTrue;
screenType = screenUnset;
@@ -1569,6 +1570,15 @@
return tiny;
}
+GBool GlobalParams::getHandleStructElemError() {
+ GBool f;
+
+ lockGlobalParams;
+ f = handleStructElemError;
+ unlockGlobalParams;
+ return f;
+}
+
GBool GlobalParams::getEnableFreeType() {
GBool f;
@@ -1858,6 +1868,12 @@
unlockGlobalParams;
}
+void GlobalParams::setHandleStructElemError(GBool handle) {
+ lockGlobalParams;
+ handleStructElemError = handle;
+ unlockGlobalParams;
+}
+
GBool GlobalParams::setEnableFreeType(char *s) {
GBool ok;
diff -aur orig-poppler-0.54.0/poppler/GlobalParams.h poppler-0.54.0/poppler/GlobalParams.h
--- orig-poppler-0.54.0/poppler/GlobalParams.h 2017-02-19 22:25:43.000000000 +0000
+++ poppler-0.54.0/poppler/GlobalParams.h 2017-06-08 12:10:41.356233859 +0000
@@ -169,6 +169,7 @@
EndOfLineKind getTextEOL();
GBool getTextPageBreaks();
GBool getTextKeepTinyChars();
+ GBool getHandleStructElemError();
GBool getEnableFreeType();
GBool getStrokeAdjust();
ScreenType getScreenType();
@@ -207,6 +208,7 @@
GBool setTextEOL(char *s);
void setTextPageBreaks(GBool pageBreaks);
void setTextKeepTinyChars(GBool keep);
+ void setHandleStructElemError(GBool handle);
GBool setEnableFreeType(char *s);
GBool setDisableFreeTypeHinting(char *s);
void setStrokeAdjust(GBool strokeAdjust);
@@ -294,6 +296,8 @@
// output
GBool textPageBreaks; // insert end-of-page markers?
GBool textKeepTinyChars; // keep all characters in text output
+ GBool handleStructElemError; // flag to enable more graceful handling
+ // of StructElem objects of unknown type
GBool enableFreeType; // FreeType enable flag
GBool disableFreeTypeHinting; // FreeType disable hinting flag
GBool strokeAdjust; // stroke adjustment enable flag
diff -aur orig-poppler-0.54.0/poppler/StructElement.cc poppler-0.54.0/poppler/StructElement.cc
--- orig-poppler-0.54.0/poppler/StructElement.cc 2016-12-06 22:48:27.000000000 +0000
+++ poppler-0.54.0/poppler/StructElement.cc 2017-06-08 12:22:09.677968867 +0000
@@ -1115,9 +1115,18 @@
// At this point either the type name must have been resolved.
if (type == Unknown) {
- error(errSyntaxError, -1, "StructElem object is wrong type ({0:s})", obj.getName());
- obj.free();
- return;
+ // Try to handle broken PDF files anyway?
+ if (globalParams->getHandleStructElemError()) {
+ // Try to handle this situation by assuming that type is Part.
+ type = Part;
+ // Issue informative warning:
+ error(errSyntaxWarning, -1, "StructElem object has unknown type ({0:s}), assuming 'Part'", obj.getName());
+ }
+ else {
+ error(errSyntaxError, -1, "StructElem object is wrong type ({0:s})", obj.getName());
+ obj.free();
+ return;
+ }
}
obj.free();
_______________________________________________
poppler mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/poppler