Currently, Annot::layoutText outputs a warning to stderr when it doesn't know 
how to print a Unicode character, bypassing centralized error() handling.

Patch 0001 fixes it, but with this patch only, character codes cannot be 
printed in upper case any more (because GooString formatting doesn't implement 
uppercase digits):
  AnnotWidget::layoutText, cannot convert U+03c6

With a quick search, it seems that the U+ convention is to always print 
uppercase hex digits.

Patch 0002 (to be applied on top of patch 0001) adds support to output 
uppercase hexadecimal digits (adding a new 'X' suffix to GooString formatting 
routines) and patches Annot::layoutText again to use the new suffix. This is 
the final output:
  AnnotWidget::layoutText, cannot convert U+03C6

To trigger the warning: pick any document with forms and add unusual 
characters (eg www.irs.gov/pub/irs-pdf/fw4.pdf and φ U+03C6)

Fabio
From c20fc3db4140ed3665da212a1a2dae2fdc9a809b Mon Sep 17 00:00:00 2001
From: Fabio D'Urso <[email protected]>
Date: Wed, 9 May 2012 16:18:05 +0200
Subject: [PATCH 1/2] Use error() instead of fprintf(stderr, ...) in
 Annot::layoutText

---
 poppler/Annot.cc |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/poppler/Annot.cc b/poppler/Annot.cc
index 24ef57d..95cee91 100644
--- a/poppler/Annot.cc
+++ b/poppler/Annot.cc
@@ -3891,8 +3891,7 @@ void Annot::layoutText(GooString *text, GooString *outBuf, int *i,
         }
       } else {
         ccToUnicode->decRefCnt();
-        fprintf(stderr,
-                "warning: layoutText: cannot convert U+%04X\n", uChar);
+        error(errSyntaxError, -1, "AnnotWidget::layoutText, cannot convert U+{0:04ux}", uChar);
       }
     }
 
-- 
1.7.6.5

From d95b6833ca02278c01e2620478f8fca26dded41b Mon Sep 17 00:00:00 2001
From: Fabio D'Urso <[email protected]>
Date: Wed, 9 May 2012 16:48:15 +0200
Subject: [PATCH 2/2] GooString formatting: add support for uppercase
 hexadecimal + Use it in Annot::layoutText

---
 goo/GooString.cc |   61 +++++++++++++++++++++++++++++++++++++++++++++--------
 goo/GooString.h  |   17 ++++++++-------
 poppler/Annot.cc |    2 +-
 3 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/goo/GooString.cc b/goo/GooString.cc
index fc78d90..1ebf341 100644
--- a/goo/GooString.cc
+++ b/goo/GooString.cc
@@ -20,6 +20,7 @@
 // Copyright (C) 2007 Jeff Muizelaar <[email protected]>
 // Copyright (C) 2008-2011 Albert Astals Cid <[email protected]>
 // Copyright (C) 2011 Kenji Uno <[email protected]>
+// Copyright (C) 2012 Fabio D'Urso <[email protected]>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -65,29 +66,35 @@ union GooStringFormatArg {
 enum GooStringFormatType {
   fmtIntDecimal,
   fmtIntHex,
+  fmtIntHexUpper,
   fmtIntOctal,
   fmtIntBinary,
   fmtUIntDecimal,
   fmtUIntHex,
+  fmtUIntHexUpper,
   fmtUIntOctal,
   fmtUIntBinary,
   fmtLongDecimal,
   fmtLongHex,
+  fmtLongHexUpper,
   fmtLongOctal,
   fmtLongBinary,
   fmtULongDecimal,
   fmtULongHex,
+  fmtULongHexUpper,
   fmtULongOctal,
   fmtULongBinary,
 #ifdef LLONG_MAX
   fmtLongLongDecimal,
   fmtLongLongHex,
+  fmtLongLongHexUpper,
   fmtLongLongOctal,
   fmtLongLongBinary,
 #endif
 #ifdef ULLONG_MAX
   fmtULongLongDecimal,
   fmtULongLongHex,
+  fmtULongLongHexUpper,
   fmtULongLongOctal,
   fmtULongLongBinary,
 #endif
@@ -101,13 +108,13 @@ enum GooStringFormatType {
 };
 
 static const char *formatStrings[] = {
-  "d", "x", "o", "b", "ud", "ux", "uo", "ub",
-  "ld", "lx", "lo", "lb", "uld", "ulx", "ulo", "ulb",
+  "d", "x", "X", "o", "b", "ud", "ux", "uX", "uo", "ub",
+  "ld", "lx", "lX", "lo", "lb", "uld", "ulx", "ulX", "ulo", "ulb",
 #ifdef LLONG_MAX
-  "lld", "llx", "llo", "llb",
+  "lld", "llx", "llX", "llo", "llb",
 #endif
 #ifdef ULLONG_MAX
-  "ulld", "ullx", "ullo", "ullb",
+  "ulld", "ullx", "ullX", "ullo", "ullb",
 #endif
   "f", "gs", "g",
   "c",
@@ -388,6 +395,7 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	  switch (ft) {
 	  case fmtIntDecimal:
 	  case fmtIntHex:
+	  case fmtIntHexUpper:
 	  case fmtIntOctal:
 	  case fmtIntBinary:
 	  case fmtSpace:
@@ -395,18 +403,21 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	    break;
 	  case fmtUIntDecimal:
 	  case fmtUIntHex:
+	  case fmtUIntHexUpper:
 	  case fmtUIntOctal:
 	  case fmtUIntBinary:
 	    args[argsLen].ui = va_arg(argList, Guint);
 	    break;
 	  case fmtLongDecimal:
 	  case fmtLongHex:
+	  case fmtLongHexUpper:
 	  case fmtLongOctal:
 	  case fmtLongBinary:
 	    args[argsLen].l = va_arg(argList, long);
 	    break;
 	  case fmtULongDecimal:
 	  case fmtULongHex:
+	  case fmtULongHexUpper:
 	  case fmtULongOctal:
 	  case fmtULongBinary:
 	    args[argsLen].ul = va_arg(argList, Gulong);
@@ -414,6 +425,7 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 #ifdef LLONG_MAX
 	  case fmtLongLongDecimal:
 	  case fmtLongLongHex:
+	  case fmtLongLongHexUpper:
 	  case fmtLongLongOctal:
 	  case fmtLongLongBinary:
 	    args[argsLen].ll = va_arg(argList, long long);
@@ -422,6 +434,7 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 #ifdef ULLONG_MAX
 	  case fmtULongLongDecimal:
 	  case fmtULongLongHex:
+	  case fmtULongLongHexUpper:
 	  case fmtULongLongOctal:
 	  case fmtULongLongBinary:
 	    args[argsLen].ull = va_arg(argList, unsigned long long);
@@ -454,6 +467,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	case fmtIntHex:
 	  formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
 	  break;
+	case fmtIntHexUpper:
+	  formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 16, &str, &len,
+		    gTrue);
+	  break;
 	case fmtIntOctal:
 	  formatInt(arg.i, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
 	  break;
@@ -468,6 +485,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	  formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16,
 		     &str, &len);
 	  break;
+	case fmtUIntHexUpper:
+	  formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 16,
+		     &str, &len, gTrue);
+	  break;
 	case fmtUIntOctal:
 	  formatUInt(arg.ui, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
 	  break;
@@ -480,6 +501,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	case fmtLongHex:
 	  formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
 	  break;
+	case fmtLongHexUpper:
+	  formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 16, &str, &len,
+		    gTrue);
+	  break;
 	case fmtLongOctal:
 	  formatInt(arg.l, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
 	  break;
@@ -494,6 +519,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	  formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16,
 		     &str, &len);
 	  break;
+	case fmtULongHexUpper:
+	  formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 16,
+		     &str, &len, gTrue);
+	  break;
 	case fmtULongOctal:
 	  formatUInt(arg.ul, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
 	  break;
@@ -507,6 +536,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	case fmtLongLongHex:
 	  formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len);
 	  break;
+	case fmtLongLongHexUpper:
+	  formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 16, &str, &len,
+		    gTrue);
+	  break;
 	case fmtLongLongOctal:
 	  formatInt(arg.ll, buf, sizeof(buf), zeroFill, width, 8, &str, &len);
 	  break;
@@ -523,6 +556,10 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
 	  formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16,
 		     &str, &len);
 	  break;
+	case fmtULongLongHexUpper:
+	  formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 16,
+		     &str, &len, gTrue);
+	  break;
 	case fmtULongLongOctal:
 	  formatUInt(arg.ull, buf, sizeof(buf), zeroFill, width, 8,
 		     &str, &len);
@@ -595,16 +632,20 @@ GooString *GooString::appendfv(const char *fmt, va_list argList) {
   gfree(args);
   return this;
 }
+
+static const char lowerCaseDigits[17] = "0123456789abcdef";
+static const char upperCaseDigits[17] = "0123456789ABCDEF";
+
 #ifdef LLONG_MAX
 void GooString::formatInt(long long x, char *buf, int bufSize,
                           GBool zeroFill, int width, int base,
-                          char **p, int *len) {
+                          char **p, int *len, GBool upperCase) {
 #else
 void GooString::formatInt(long x, char *buf, int bufSize,
                           GBool zeroFill, int width, int base,
-                          char **p, int *len) {
+                          char **p, int *len, GBool upperCase) {
 #endif
-  static char vals[17] = "0123456789abcdef";
+  const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
   GBool neg;
   int start, i, j;
 
@@ -636,13 +677,13 @@ void GooString::formatInt(long x, char *buf, int bufSize,
 #ifdef ULLONG_MAX
 void GooString::formatUInt(unsigned long long x, char *buf, int bufSize,
                            GBool zeroFill, int width, int base,
-                           char **p, int *len) {
+                           char **p, int *len, GBool upperCase) {
 #else
 void GooString::formatUInt(Gulong x, char *buf, int bufSize,
                            GBool zeroFill, int width, int base,
-                           char **p, int *len) {
+                           char **p, int *len, GBool upperCase) {
 #endif
-  static char vals[17] = "0123456789abcdef";
+  const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
   int i, j;
 
   i = bufSize;
diff --git a/goo/GooString.h b/goo/GooString.h
index 23558b0..b24051b 100644
--- a/goo/GooString.h
+++ b/goo/GooString.h
@@ -18,6 +18,7 @@
 // Copyright (C) 2006 Kristian Høgsberg <[email protected]>
 // Copyright (C) 2006 Krzysztof Kowalczyk <[email protected]>
 // Copyright (C) 2008-2010, 2012 Albert Astals Cid <[email protected]>
+// Copyright (C) 2012 Fabio D'Urso <[email protected]>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -80,10 +81,10 @@ public:
   // - <precision> is the number of digits to the right of the decimal
   //   point (for floating point numbers)
   // - <type> is one of:
-  //     d, x, o, b -- int in decimal, hex, octal, binary
-  //     ud, ux, uo, ub -- unsigned int
-  //     ld, lx, lo, lb, uld, ulx, ulo, ulb -- long, unsigned long
-  //     lld, llx, llo, llb, ulld, ullx, ullo, ullb
+  //     d, x, X, o, b -- int in decimal, lowercase hex, uppercase hex, octal, binary
+  //     ud, ux, uX, uo, ub -- unsigned int
+  //     ld, lx, lX, lo, lb, uld, ulx, ulX, ulo, ulb -- long, unsigned long
+  //     lld, llx, llX, llo, llb, ulld, ullx, ullX, ullo, ullb
   //         -- long long, unsigned long long
   //     f, g -- double
   //     c -- char
@@ -170,20 +171,20 @@ private:
 #ifdef LLONG_MAX
   static void formatInt(long long x, char *buf, int bufSize,
 			GBool zeroFill, int width, int base,
-			char **p, int *len);
+			char **p, int *len, GBool upperCase = gFalse);
 #else
   static void formatInt(long x, char *buf, int bufSize,
 			GBool zeroFill, int width, int base,
-			char **p, int *len);
+			char **p, int *len, GBool upperCase = gFalse);
 #endif
 #ifdef ULLONG_MAX
   static void formatUInt(unsigned long long x, char *buf, int bufSize,
 			 GBool zeroFill, int width, int base,
-			 char **p, int *len);
+			 char **p, int *len, GBool upperCase = gFalse);
 #else
   static void formatUInt(Gulong x, char *buf, int bufSize,
 			 GBool zeroFill, int width, int base,
-			 char **p, int *len);
+			 char **p, int *len, GBool upperCase = gFalse);
 #endif
   static void formatDouble(double x, char *buf, int bufSize, int prec,
 			   GBool trim, char **p, int *len);
diff --git a/poppler/Annot.cc b/poppler/Annot.cc
index 95cee91..8e1e760 100644
--- a/poppler/Annot.cc
+++ b/poppler/Annot.cc
@@ -3891,7 +3891,7 @@ void Annot::layoutText(GooString *text, GooString *outBuf, int *i,
         }
       } else {
         ccToUnicode->decRefCnt();
-        error(errSyntaxError, -1, "AnnotWidget::layoutText, cannot convert U+{0:04ux}", uChar);
+        error(errSyntaxError, -1, "AnnotWidget::layoutText, cannot convert U+{0:04uX}", uChar);
       }
     }
 
-- 
1.7.6.5

_______________________________________________
poppler mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/poppler

Reply via email to