Hi!
Here is a patch I made (against 0.54.0 - but it applies cleanly to
0.56.0) to handle pdfs that use unknown type names.

Regards, Jan-Erik

diff -aur orig-poppler-0.54.0/poppler/GlobalParams.cc poppler-0.54.0/poppler/GlobalParams.cc
--- orig-poppler-0.54.0/poppler/GlobalParams.cc	2016-12-06 22:48:27.000000000 +0000
+++ poppler-0.54.0/poppler/GlobalParams.cc	2017-06-08 12:10:12.444036161 +0000
@@ -607,6 +607,7 @@
 #endif
   textPageBreaks = gTrue;
   textKeepTinyChars = gFalse;
+  handleStructElemError = gFalse;
   enableFreeType = gTrue;
   strokeAdjust = gTrue;
   screenType = screenUnset;
@@ -1569,6 +1570,15 @@
   return tiny;
 }
 
+GBool GlobalParams::getHandleStructElemError() {
+  GBool f;
+
+  lockGlobalParams;
+  f = handleStructElemError;
+  unlockGlobalParams;
+  return f;
+}
+
 GBool GlobalParams::getEnableFreeType() {
   GBool f;
 
@@ -1858,6 +1868,12 @@
   unlockGlobalParams;
 }
 
+void GlobalParams::setHandleStructElemError(GBool handle) {
+  lockGlobalParams;
+  handleStructElemError = handle;
+  unlockGlobalParams;
+}
+
 GBool GlobalParams::setEnableFreeType(char *s) {
   GBool ok;
 
diff -aur orig-poppler-0.54.0/poppler/GlobalParams.h poppler-0.54.0/poppler/GlobalParams.h
--- orig-poppler-0.54.0/poppler/GlobalParams.h	2017-02-19 22:25:43.000000000 +0000
+++ poppler-0.54.0/poppler/GlobalParams.h	2017-06-08 12:10:41.356233859 +0000
@@ -169,6 +169,7 @@
   EndOfLineKind getTextEOL();
   GBool getTextPageBreaks();
   GBool getTextKeepTinyChars();
+  GBool getHandleStructElemError();
   GBool getEnableFreeType();
   GBool getStrokeAdjust();
   ScreenType getScreenType();
@@ -207,6 +208,7 @@
   GBool setTextEOL(char *s);
   void setTextPageBreaks(GBool pageBreaks);
   void setTextKeepTinyChars(GBool keep);
+  void setHandleStructElemError(GBool handle);
   GBool setEnableFreeType(char *s);
   GBool setDisableFreeTypeHinting(char *s);
   void setStrokeAdjust(GBool strokeAdjust);
@@ -294,6 +296,8 @@
 				//   output
   GBool textPageBreaks;		// insert end-of-page markers?
   GBool textKeepTinyChars;	// keep all characters in text output
+  GBool handleStructElemError;	// flag to enable more graceful handling
+				//   of StructElem objects of unknown type
   GBool enableFreeType;		// FreeType enable flag
   GBool disableFreeTypeHinting;	// FreeType disable hinting flag
   GBool strokeAdjust;		// stroke adjustment enable flag
diff -aur orig-poppler-0.54.0/poppler/StructElement.cc poppler-0.54.0/poppler/StructElement.cc
--- orig-poppler-0.54.0/poppler/StructElement.cc	2016-12-06 22:48:27.000000000 +0000
+++ poppler-0.54.0/poppler/StructElement.cc	2017-06-08 12:22:09.677968867 +0000
@@ -1115,9 +1115,18 @@
 
   // At this point either the type name must have been resolved.
   if (type == Unknown) {
-    error(errSyntaxError, -1, "StructElem object is wrong type ({0:s})", obj.getName());
-    obj.free();
-    return;
+    // Try to handle broken PDF files anyway?
+    if (globalParams->getHandleStructElemError()) {
+      // Try to handle this situation by assuming that type is Part.
+      type = Part;
+      // Issue informative warning:
+      error(errSyntaxWarning, -1, "StructElem object has unknown type ({0:s}), assuming 'Part'", obj.getName());
+    }
+    else {
+      error(errSyntaxError, -1, "StructElem object is wrong type ({0:s})", obj.getName());
+      obj.free();
+      return;
+    }
   }
   obj.free();
 
_______________________________________________
poppler mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to