Currently, Annot::layoutText outputs a warning to stderr when it doesn't know how to print a Unicode character, bypassing centralized error() handling.
Patch 0001 fixes it, but with this patch only, character codes cannot be printed in upper case any more (because GooString formatting doesn't implement uppercase digits): AnnotWidget::layoutText, cannot convert U+03c6 With a quick search, it seems that the U+ convention is to always print uppercase hex digits. Patch 0002 (to be applied on top of patch 0001) adds support to output uppercase hexadecimal digits (adding a new 'X' suffix to GooString formatting routines) and patches Annot::layoutText again to use the new suffix. This is the final output: AnnotWidget::layoutText, cannot convert U+03C6 To trigger the warning: pick any document with forms and add unusual characters (eg www.irs.gov/pub/irs-pdf/fw4.pdf and φ U+03C6) Fabio
From c20fc3db4140ed3665da212a1a2dae2fdc9a809b Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Wed, 9 May 2012 16:18:05 +0200 Subject: [PATCH 1/2] Use error() instead of fprintf(stderr, ...) in Annot::layoutText --- poppler/Annot.cc | 3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/poppler/Annot.cc b/poppler/Annot.cc index 24ef57d..95cee91 100644 --- a/poppler/Annot.cc +++ b/poppler/Annot.cc @@ -3891,8 +3891,7 @@ void Annot::layoutText(GooString *text, GooString *outBuf, int *i, } } else { ccToUnicode->decRefCnt(); - fprintf(stderr, - "warning: layoutText: cannot convert U+%04X\n", uChar); + error(errSyntaxError, -1, "AnnotWidget::layoutText, cannot convert U+{0:04ux}", uChar); } } -- 1.7.6.5
From d95b6833ca02278c01e2620478f8fca26dded41b Mon Sep 17 00:00:00 2001 From: Fabio D'Urso <[email protected]> Date: Wed, 9 May 2012 16:48:15 +0200 Subject: [PATCH 2/2] GooString formatting: add support for uppercase hexadecimal + Use it in Annot::layoutText --- goo/GooString.cc | 61 +++++++++++++++++++++++++++++++++++++++++++++-------- goo/GooString.h | 17 ++++++++------- poppler/Annot.cc | 2 +- 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/goo/GooString.cc b/goo/GooString.cc index fc78d90..1ebf341 100644 --- a/goo/GooString.cc +++ b/goo/GooString.cc @@ -20,6 +20,7 @@ // Copyright (C) 2007 Jeff Muizelaar <[email protected]> // Copyright (C) 2008-2011 Albert Astals Cid <[email protected]> // Copyright (C) 2011 Kenji Uno <[email protected]> +// Copyright (C) 2012 Fabio D'Urso <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -65,29 +66,35 @@ union GooStringFormatArg { enum GooStringFormatType { fmtIntDecimal, fmtIntHex, + fmtIntHexUpper, fmtIntOctal, fmtIntBinary, fmtUIntDecimal, fmtUIntHex, + fmtUIntHexUpper, fmtUIntOctal, fmtUIntBinary, fmtLongDecimal, fmtLongHex, + fmtLongHexUpper, fmtLongOctal, fmtLongBinary, fmtULongDecimal, fmtULongHex, + fmtULongHexUpper, fmtULongOctal, fmtULongBinary, #ifdef LLONG_MAX fmtLongLongDecimal, fmtLongLongHex, + fmtLongLongHexUpper, fmtLongLongOctal, fmtLongLongBinary, #endif #ifdef ULLONG_MAX fmtULongLongDecimal, fmtULongLongHex, + fmtULongLongHexUpper, fmtULongLongOctal, fmtULongLongBinary, #endif @@ -101,13 +108,13 @@ enum GooStringFormatType { }; static const char *formatStrings[] = { - "d", "x", "o", "b", "ud", "ux", "uo", "ub", - "ld", "lx", "lo", "lb", "uld", "ulx", "ulo", "ulb", + "d", "x", "X", "o", "b", "ud", "ux", "uX", "uo", "ub", + "ld", "lx", "lX", "lo", "lb", "uld", "ulx", "ulX", "ulo", "ulb", #ifdef LLONG_MAX - "lld", "llx", "llo", "llb", + "lld", "llx", "llX", "llo", "llb", #endif #ifdef ULLONG_MAX - "ulld", "ullx", "ullo", "ullb", + "ulld", "ullx", "ullX", "ullo", "ullb", #endif "f", "gs", "g", "c", @@ -388,6 +395,7 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { switch (ft) { case fmtIntDecimal: case fmtIntHex: + case fmtIntHexUpper: case fmtIntOctal: case fmtIntBinary: case fmtSpace: @@ -395,18 +403,21 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { break; case fmtUIntDecimal: case fmtUIntHex: + case fmtUIntHexUpper: case fmtUIntOctal: case fmtUIntBinary: args[argsLen].ui = va_arg(argList, Guint); break; case fmtLongDecimal: case fmtLongHex: + case fmtLongHexUpper: case fmtLongOctal: case fmtLongBinary: args[argsLen].l = va_arg(argList, long); break; case fmtULongDecimal: case fmtULongHex: + case fmtULongHexUpper: case fmtULongOctal: case fmtULongBinary: args[argsLen].ul = va_arg(argList, Gulong); @@ -414,6 +425,7 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { #ifdef LLONG_MAX case fmtLongLongDecimal: case fmtLongLongHex: + case fmtLongLongHexUpper: case fmtLongLongOctal: case fmtLongLongBinary: args[argsLen].ll = va_arg(argList, long long); @@ -422,6 +434,7 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { #ifdef ULLONG_MAX case fmtULongLongDecimal: case fmtULongLongHex: + case fmtULongLongHexUpper: case fmtULongLongOctal: case fmtULongLongBinary: args[argsLen].ull = va_arg(argList, unsigned long long); @@ -454,6 +467,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { case fmtIntHex: formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len); break; + case fmtIntHexUpper: + formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len, + gTrue); + break; case fmtIntOctal: formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 8, &str, &len); break; @@ -468,6 +485,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16, &str, &len); break; + case fmtUIntHexUpper: + formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16, + &str, &len, gTrue); + break; case fmtUIntOctal: formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 8, &str, &len); break; @@ -480,6 +501,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { case fmtLongHex: formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len); break; + case fmtLongHexUpper: + formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len, + gTrue); + break; case fmtLongOctal: formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 8, &str, &len); break; @@ -494,6 +519,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16, &str, &len); break; + case fmtULongHexUpper: + formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16, + &str, &len, gTrue); + break; case fmtULongOctal: formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 8, &str, &len); break; @@ -507,6 +536,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { case fmtLongLongHex: formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len); break; + case fmtLongLongHexUpper: + formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len, + gTrue); + break; case fmtLongLongOctal: formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 8, &str, &len); break; @@ -523,6 +556,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16, &str, &len); break; + case fmtULongLongHexUpper: + formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16, + &str, &len, gTrue); + break; case fmtULongLongOctal: formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 8, &str, &len); @@ -595,16 +632,20 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) { gfree(args); return this; } + +static const char lowerCaseDigits[17] = "0123456789abcdef"; +static const char upperCaseDigits[17] = "0123456789ABCDEF"; + #ifdef LLONG_MAX void GooString::formatInt(long long x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len) { + char **p, int *len, GBool upperCase) { #else void GooString::formatInt(long x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len) { + char **p, int *len, GBool upperCase) { #endif - static char vals[17] = "0123456789abcdef"; + const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits; GBool neg; int start, i, j; @@ -636,13 +677,13 @@ void GooString::formatInt(long x, char *buf, int bufSize, #ifdef ULLONG_MAX void GooString::formatUInt(unsigned long long x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len) { + char **p, int *len, GBool upperCase) { #else void GooString::formatUInt(Gulong x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len) { + char **p, int *len, GBool upperCase) { #endif - static char vals[17] = "0123456789abcdef"; + const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits; int i, j; i = bufSize; diff --git a/goo/GooString.h b/goo/GooString.h index 23558b0..b24051b 100644 --- a/goo/GooString.h +++ b/goo/GooString.h @@ -18,6 +18,7 @@ // Copyright (C) 2006 Kristian Høgsberg <[email protected]> // Copyright (C) 2006 Krzysztof Kowalczyk <[email protected]> // Copyright (C) 2008-2010, 2012 Albert Astals Cid <[email protected]> +// Copyright (C) 2012 Fabio D'Urso <[email protected]> // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -80,10 +81,10 @@ public: // - <precision> is the number of digits to the right of the decimal // point (for floating point numbers) // - <type> is one of: - // d, x, o, b -- int in decimal, hex, octal, binary - // ud, ux, uo, ub -- unsigned int - // ld, lx, lo, lb, uld, ulx, ulo, ulb -- long, unsigned long - // lld, llx, llo, llb, ulld, ullx, ullo, ullb + // d, x, X, o, b -- int in decimal, lowercase hex, uppercase hex, octal, binary + // ud, ux, uX, uo, ub -- unsigned int + // ld, lx, lX, lo, lb, uld, ulx, ulX, ulo, ulb -- long, unsigned long + // lld, llx, llX, llo, llb, ulld, ullx, ullX, ullo, ullb // -- long long, unsigned long long // f, g -- double // c -- char @@ -170,20 +171,20 @@ private: #ifdef LLONG_MAX static void formatInt(long long x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len); + char **p, int *len, GBool upperCase = gFalse); #else static void formatInt(long x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len); + char **p, int *len, GBool upperCase = gFalse); #endif #ifdef ULLONG_MAX static void formatUInt(unsigned long long x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len); + char **p, int *len, GBool upperCase = gFalse); #else static void formatUInt(Gulong x, char *buf, int bufSize, GBool zeroFill, int width, int base, - char **p, int *len); + char **p, int *len, GBool upperCase = gFalse); #endif static void formatDouble(double x, char *buf, int bufSize, int prec, GBool trim, char **p, int *len); diff --git a/poppler/Annot.cc b/poppler/Annot.cc index 95cee91..8e1e760 100644 --- a/poppler/Annot.cc +++ b/poppler/Annot.cc @@ -3891,7 +3891,7 @@ void Annot::layoutText(GooString *text, GooString *outBuf, int *i, } } else { ccToUnicode->decRefCnt(); - error(errSyntaxError, -1, "AnnotWidget::layoutText, cannot convert U+{0:04ux}", uChar); + error(errSyntaxError, -1, "AnnotWidget::layoutText, cannot convert U+{0:04uX}", uChar); } } -- 1.7.6.5
_______________________________________________ poppler mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/poppler
