poppler/Gfx.cc | 6 ++++++ poppler/OutputDev.h | 4 ++++ poppler/TextOutputDev.h | 4 ++++ 3 files changed, 14 insertions(+)
New commits: Author: Peter Waller <[email protected]> Date: Wed, 27 May 2015 22:02:28 +0100 If the font has no unicode cmap, it's not possible to output text for that encoding, so rather than potentially corrupting the textual output, the characters are dropped. These characters are kept for rendering. It may be possible to keep the characters for text output if they happen to lie in the set of printable characters, but my first priority is to fix crashes where the glib API returns an inconsistent number of glyphs via poppler_page_get_text and poppler_page_get_text_layout. Original bug: https://bugs.freedesktop.org/show_bug.cgi?id=73885 diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc index 07d95b3..130363d 100644 --- a/poppler/Gfx.cc +++ b/poppler/Gfx.cc @@ -3934,6 +3934,12 @@ void Gfx::doShowText(GooString *s) { int len, n, uLen, nChars, nSpaces, i; font = state->getFont(); + + if (out->needUnicodeText() && !font->hasToUnicodeCMap()) { + // No conversion to unicode available, drop characters. + return; + } + wMode = font->getWMode(); if (out->useDrawChar()) { diff --git a/poppler/OutputDev.h b/poppler/OutputDev.h index e8a7a47..7e63739 100644 --- a/poppler/OutputDev.h +++ b/poppler/OutputDev.h @@ -116,6 +116,10 @@ public: // Does this device need non-text content? virtual GBool needNonText() { return gTrue; } + // Does this device expect valid UTF-8 text? (i.e, discard characters for + // which cannot determine UTF-8 equivalents due to a missing unicode mapping) + virtual GBool needUnicodeText() { return gFalse; } + // Does this device require incCharCount to be called for text on // non-shown layers? virtual GBool needCharCount() { return gFalse; } diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index a0aa6f8..8bbd018 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -762,6 +762,10 @@ public: // Does this device need non-text content? virtual GBool needNonText() { return gFalse; } + // Does this device expect valid UTF-8 text? (i.e, discard characters for + // which cannot determine UTF-8 equivalents due to a missing unicode mapping) + virtual GBool needUnicodeText() { return gTrue; } + // Does this device require incCharCount to be called for text on // non-shown layers? virtual GBool needCharCount() { return gTrue; } -- 1.9.1 _______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
