tags 737989 +patch
thanks
When using textEncoding UTF-8, which now may be the default in debian,
the X text selection is STRING type but contains utf-8 bytes.
I expected STRING type to contain latin-1 bytes.
I suggest the change below letting Xutf8TextListToTextProperty() form
suitable bytes in the selection when running with -enc UTF-8. This
gives all selection data types STRING, COMPOUND_TEXT, TEXT and
UTF8_STRING.
Offering all these types gives maximum inter-operation with other X
programs. New enough programs might enjoy UTF8_STRING but traditional
programs may only know COMPOUND_TEXT and simplistic programs might only
take STRING.
--- XPDFCore.h.orig 2014-07-31 18:46:16.618036262 +1000
+++ XPDFCore.h 2014-07-31 09:44:06.534977028 +1000
@@ -223,6 +223,9 @@
static GString *currentSelection; // selected text
static XPDFCore *currentSelectionOwner;
static Atom targetsAtom;
+ static Atom textAtom;
+ static Atom compoundtextAtom;
+ static Atom utf8stringAtom;
GBool panning;
int panMX, panMY;
--- XPDFCore.cc.orig 2014-07-31 18:46:16.614036262 +1000
+++ XPDFCore.cc 2014-07-31 18:45:25.522038386 +1000
@@ -57,6 +57,9 @@
GString *XPDFCore::currentSelection = NULL;
XPDFCore *XPDFCore::currentSelectionOwner = NULL;
Atom XPDFCore::targetsAtom;
+Atom XPDFCore::textAtom;
+Atom XPDFCore::compoundtextAtom;
+Atom XPDFCore::utf8stringAtom;
//------------------------------------------------------------------------
// XPDFCoreTile
@@ -100,6 +103,9 @@
display = XtDisplay(parentWidget);
screenNum = XScreenNumberOfScreen(XtScreen(parentWidget));
targetsAtom = XInternAtom(display, "TARGETS", False);
+ textAtom = XInternAtom(display, "TEXT", False);
+ compoundtextAtom = XInternAtom(display, "COMPOUND_TEXT", False);
+ utf8stringAtom = XInternAtom(display, "UTF8_STRING", False);
paperPixel = paperPixelA;
mattePixel = mattePixelA;
@@ -443,7 +449,7 @@
// send back a list of supported conversion targets
if (*target == targetsAtom) {
- if (!(array = (Atom *)XtMalloc(sizeof(Atom)))) {
+ if (!(array = (Atom *)XtMalloc(4 * sizeof(Atom)))) {
return False;
}
array[0] = XA_STRING;
@@ -451,6 +457,54 @@
*type = XA_ATOM;
*format = 32;
*length = 1;
+
+ if (!globalParams->getTextEncodingName()->cmp("UTF-8")) {
+ array[1] = textAtom;
+ array[2] = compoundtextAtom;
+ array[3] = utf8stringAtom;
+ *length = 4;
+ }
+ return True;
+
+ // ENHANCE-ME: If currentSelection could be made always UTF-8 then we
+ // could use this UTF-8 code always, not just when the user chooses
+ // UTF-8 in textEncoding / -enc. Can TextOutputDev be asked nicely to
+ // give us UTF-8 in copySelection()?
+ //
+ } else if (!globalParams->getTextEncodingName()->cmp("UTF-8")
+ && (*target == XA_STRING
+ || *target == textAtom
+ || *target == compoundtextAtom)) {
+ char *str = currentSelection->getCString();
+ XICCEncodingStyle style
+ = (*target == XA_STRING ? XStringStyle
+ : *target == textAtom ? XStdICCTextStyle
+ : XCompoundTextStyle);
+ XTextProperty t;
+ int ret = Xutf8TextListToTextProperty(XtDisplay(widget),
+ &str,1, style, &t);
+ if (ret < 0) {
+ error(errInternal, -1 , "cannot form text property, error {0:d}", ret);
+ return False;
+ }
+ *value = t.value;
+ *type = t.encoding;
+ *format = t.format;
+ *length = t.nitems;
+ return True;
+
+ // UTF8_STRING case could be handled by Xutf8TextListToTextProperty()
+ // above with XUTF8StringStyle if desired. But there's no conversion in
+ // that case and XUTF8StringStyle is an XFree86 extension which might
+ // not be present in older Xlib. A plain direct send lets us support
+ // UTF8_STRING always.
+ //
+ } else if (!globalParams->getTextEncodingName()->cmp("UTF-8")
+ && *target == utf8stringAtom) {
+ *value = XtNewString(currentSelection->getCString());
+ *length = strlen((char*) *value);
+ *type = utf8stringAtom;
+ *format = 8; // 8-bit elements
return True;
// send the selected text