tags 737989 +patch
thanks

When using textEncoding UTF-8, which now may be the default in debian,
the X text selection is STRING type but contains utf-8 bytes.
I expected STRING type to contain latin-1 bytes.

I suggest the change below letting Xutf8TextListToTextProperty() form
suitable bytes in the selection when running with -enc UTF-8.  This
gives all selection data types STRING, COMPOUND_TEXT, TEXT and
UTF8_STRING.

Offering all these types gives maximum inter-operation with other X
programs.  New enough programs might enjoy UTF8_STRING but traditional
programs may only know COMPOUND_TEXT and simplistic programs might only
take STRING.


--- XPDFCore.h.orig	2014-07-31 18:46:16.618036262 +1000
+++ XPDFCore.h	2014-07-31 09:44:06.534977028 +1000
@@ -223,6 +223,9 @@
   static GString *currentSelection;  // selected text
   static XPDFCore *currentSelectionOwner;
   static Atom targetsAtom;
+  static Atom textAtom;
+  static Atom compoundtextAtom;
+  static Atom utf8stringAtom;
 
   GBool panning;
   int panMX, panMY;
--- XPDFCore.cc.orig	2014-07-31 18:46:16.614036262 +1000
+++ XPDFCore.cc	2014-07-31 18:45:25.522038386 +1000
@@ -57,6 +57,9 @@
 GString *XPDFCore::currentSelection = NULL;
 XPDFCore *XPDFCore::currentSelectionOwner = NULL;
 Atom XPDFCore::targetsAtom;
+Atom XPDFCore::textAtom;
+Atom XPDFCore::compoundtextAtom;
+Atom XPDFCore::utf8stringAtom;
 
 //------------------------------------------------------------------------
 // XPDFCoreTile
@@ -100,6 +103,9 @@
   display = XtDisplay(parentWidget);
   screenNum = XScreenNumberOfScreen(XtScreen(parentWidget));
   targetsAtom = XInternAtom(display, "TARGETS", False);
+  textAtom         = XInternAtom(display, "TEXT",          False);
+  compoundtextAtom = XInternAtom(display, "COMPOUND_TEXT", False);
+  utf8stringAtom   = XInternAtom(display, "UTF8_STRING",   False);
 
   paperPixel = paperPixelA;
   mattePixel = mattePixelA;
@@ -443,7 +449,7 @@
 
   // send back a list of supported conversion targets
   if (*target == targetsAtom) {
-    if (!(array = (Atom *)XtMalloc(sizeof(Atom)))) {
+    if (!(array = (Atom *)XtMalloc(4 * sizeof(Atom)))) {
       return False;
     }
     array[0] = XA_STRING;
@@ -451,6 +457,54 @@
     *type = XA_ATOM;
     *format = 32;
     *length = 1;
+
+    if (!globalParams->getTextEncodingName()->cmp("UTF-8")) {
+      array[1] = textAtom;
+      array[2] = compoundtextAtom;
+      array[3] = utf8stringAtom;
+      *length = 4;
+    }
+    return True;
+
+    // ENHANCE-ME: If currentSelection could be made always UTF-8 then we
+    // could use this UTF-8 code always, not just when the user chooses
+    // UTF-8 in textEncoding / -enc.  Can TextOutputDev be asked nicely to
+    // give us UTF-8 in copySelection()?
+    //
+  } else if (!globalParams->getTextEncodingName()->cmp("UTF-8")
+             && (*target == XA_STRING
+                 || *target == textAtom
+                 || *target == compoundtextAtom)) {
+    char *str = currentSelection->getCString();
+    XICCEncodingStyle style
+      = (*target == XA_STRING  ? XStringStyle
+         : *target == textAtom ? XStdICCTextStyle
+         :                       XCompoundTextStyle);
+    XTextProperty t;
+    int ret = Xutf8TextListToTextProperty(XtDisplay(widget),
+                                          &str,1, style, &t);
+    if (ret < 0) {
+      error(errInternal, -1 , "cannot form text property, error {0:d}", ret);
+      return False;
+    }
+    *value = t.value;
+    *type = t.encoding;
+    *format = t.format;
+    *length = t.nitems;
+    return True;
+
+    // UTF8_STRING case could be handled by Xutf8TextListToTextProperty()
+    // above with XUTF8StringStyle if desired.  But there's no conversion in
+    // that case and XUTF8StringStyle is an XFree86 extension which might
+    // not be present in older Xlib.  A plain direct send lets us support
+    // UTF8_STRING always.
+    //
+  } else if (!globalParams->getTextEncodingName()->cmp("UTF-8")
+             && *target == utf8stringAtom) {
+    *value = XtNewString(currentSelection->getCString());
+    *length = strlen((char*) *value);
+    *type = utf8stringAtom;
+    *format = 8; // 8-bit elements
     return True;
 
   // send the selected text

Reply via email to