On Wed, 2021-04-14 at 20:10 +0300, Lauri Tirkkonen wrote:
> Since the discussion seems to have died out, I take my patch will not be
> accepted.
> 
> The decision appears to be that OpenBSD is right and everyone else is wrong in
> this matter. Given that, and the calls to change the behavior of other OSes 
> and
> terminal emulators around SHY: are you going to at least patch xterm in-tree 
> so
> that it does not render SHY?
> 
> Or must it remain broken?
> 
Looking closer at the xterm source corroborated my previous reasoning.
>From xterm's wcwidth.c:
/*
 * Provide a way to change the behavior of soft-hyphen.
 */
void mk_wcwidth_init(int mode)
{
  use_latin1 = (mode == 0);
}

and

 *    - SOFT HYPHEN (U+00AD) has a column width of 1 in Latin-1, 0 in Unicode.
 *      An initialization function is used to switch between the two.

So it is the intention of xterm to not display the soft hyphen in
unicode mode.

This is also corrobarated by charproc.c5799 where the error occurs:
                        if (ch == 0xad) {                            
                            /*                                                  
                                        
                             * Only display soft-hyphen if it happens to be at
                             * the right-margin.  While that means that only
                             * the displayed character could be selected for
                             * pasting, a well-behaved application would never
                             * send this, anyway...
                             */

The problem here is that on line 5795 we have:
last_chomp = CharWidth(buf[n]);
which expands to:
CharWidth(n) (((n) < 256) ? (IsLatin1(n) ? 1 : 0) : my_wcwidth((wchar_t) (n)))
and
#define IsLatin1(n)  (((n) >= 32 && (n) <= 126) || ((n) >= 160 && (n) <= 255))

So here's the big oops: CharWidth doesn't know we're in UTF-8 mode and
we never reach my_wcwidth.

Diff below fixes this behaviour for me and restores the printing
behaviour when I run xterm with +u8 to reset utf-8 mode.
However, I'm no xterm hacker and it's quite a beast, so this needs
proper testing and scrutiny from someone who knows the code to make
sure there's no use of uninitialized variables. (CC matthieu@)

No intention of pushing this for 6.9, but maybe someone brave is
willing to dive in here after me.

martijn@

Index: charproc.c
===================================================================
RCS file: /cvs/xenocara/app/xterm/charproc.c,v
retrieving revision 1.49
diff -u -p -r1.49 charproc.c
--- charproc.c  2 Apr 2021 18:44:19 -0000       1.49
+++ charproc.c  14 Apr 2021 19:24:14 -0000
@@ -2305,7 +2305,7 @@ doparsing(XtermWidget xw, unsigned c, st
         */
        if (c >= 0x300
            && screen->wide_chars
-           && CharWidth(c) == 0
+           && CharWidth(screen, c) == 0
            && !isWideControl(c)) {
            int prev, test;
            Boolean used = True;
@@ -2330,9 +2330,9 @@ doparsing(XtermWidget xw, unsigned c, st
                prev = (int) XTERM_CELL(use_row, use_col);
                test = do_precomposition(prev, (int) c);
                TRACE(("do_precomposition (U+%04X [%d], U+%04X [%d]) -> U+%04X 
[%d]\n",
-                      prev, CharWidth(prev),
-                      (int) c, CharWidth(c),
-                      test, CharWidth(test)));
+                      prev, CharWidth(screen, prev),
+                      (int) c, CharWidth(screen, c),
+                      test, CharWidth(screen, test)));
            } else {
                prev = -1;
                test = -1;
@@ -2342,7 +2342,7 @@ doparsing(XtermWidget xw, unsigned c, st
             * only if it does not change the width of the base character
             */
            if (test != -1
-               && CharWidth(test) == CharWidth(prev)) {
+               && CharWidth(screen, test) == CharWidth(screen, prev)) {
                putXtermCell(screen, use_row, use_col, test);
            } else if (screen->char_was_written
                       || getXtermCell(screen, use_row, use_col) >= ' ') {
@@ -4551,7 +4551,7 @@ doparsing(XtermWidget xw, unsigned c, st
                value = zero_if_default(0);
 
                TRACE(("CASE_DECFRA - Fill rectangular area\n"));
-               if (nparam > 0 && CharWidth(value) > 0) {
+               if (nparam > 0 && CharWidth(screen, value) > 0) {
                    xtermParseRect(xw, ParamPair(1), &myRect);
                    ScrnFillRectangle(xw, &myRect, value, xw->flags, True);
                }
@@ -4860,7 +4860,7 @@ doparsing(XtermWidget xw, unsigned c, st
 
        case CASE_REP:
            TRACE(("CASE_REP\n"));
-           if (CharWidth(sp->lastchar) > 0) {
+           if (CharWidth(screen, sp->lastchar) > 0) {
                IChar repeated[2];
                count = one_if_default(0);
                repeated[0] = (IChar) sp->lastchar;
@@ -5792,7 +5792,7 @@ dotext(XtermWidget xw,
                           buf[n] <= 0xa0) {
                    last_chomp = 1;
                } else {
-                   last_chomp = CharWidth(buf[n]);
+                   last_chomp = CharWidth(screen, buf[n]);
                    if (last_chomp <= 0) {
                        IChar ch = buf[n];
                        Bool eat_it = !screen->utf8_mode && (ch > 127);
Index: fontutils.c
===================================================================
RCS file: /cvs/xenocara/app/xterm/fontutils.c,v
retrieving revision 1.37
diff -u -p -r1.37 fontutils.c
--- fontutils.c 2 Apr 2021 18:44:19 -0000       1.37
+++ fontutils.c 14 Apr 2021 19:24:14 -0000
@@ -2442,7 +2442,7 @@ dumpXft(XtermWidget xw, XTermXftFonts *d
 #endif
     for (c = first; c <= last; ++c) {
        if (FcCharSetHasChar(xft->charset, c)) {
-           int width = CharWidth(c);
+           int width = CharWidth(screen, c);
            XGlyphInfo extents;
            Boolean big_x;
            Boolean big_y;
@@ -2610,7 +2610,7 @@ checkXftWidth(XtermWidget xw, XTermXftFo
      * Ignore control characters - their extent information is misleading.
      */
     for (c = 32; c < 256; ++c) {
-       if (CharWidth(c) <= 0)
+       if (CharWidth(TScreenOf(xw), c) <= 0)
            continue;
        if (FcCharSetHasChar(source->font->charset, c)) {
            (void) checkedXftWidth(XtDisplay(xw),
@@ -3626,8 +3626,7 @@ xtermMissingChar(unsigned ch, XTermFonts
 #endif
 
     if (pc == 0 || CI_NONEXISTCHAR(pc)) {
-       TRACE2(("xtermMissingChar %#04x (!exists), %d cells\n",
-               ch, CharWidth(ch)));
+       TRACE2(("xtermMissingChar %#04x (!exists)\n", ch));
        result = True;
     }
     if (ch < KNOWN_MISSING) {
@@ -4054,7 +4053,7 @@ foundXftGlyph(XtermWidget xw, XftFont *f
     if (font != 0 && XftGlyphExists(screen->display, font, wc)) {
        int expect;
 
-       if ((expect = CharWidth(wc)) > 0) {
+       if ((expect = CharWidth(screen, wc)) > 0) {
            XGlyphInfo gi;
            int actual;
 
Index: util.c
===================================================================
RCS file: /cvs/xenocara/app/xterm/util.c,v
retrieving revision 1.39
diff -u -p -r1.39 util.c
--- util.c      2 Apr 2021 18:44:19 -0000       1.39
+++ util.c      14 Apr 2021 19:24:14 -0000
@@ -2937,14 +2937,14 @@ getXftColor(XtermWidget xw, Pixel pixel)
              ? (((ch) >= 128 && (ch) < 160) \
                  ? (TScreenOf(xw)->c1_printable ? 1 : 0) \
                  : 1) \
-             : CharWidth(ch)))
+             : CharWidth(TScreenOf(xw), ch)))
 #else
 #define XtermCellWidth(xw, ch) \
        (((ch) == 0 || (ch) == 127) \
          ? 0 \
          : (((ch) < 256) \
              ? 1 \
-             : CharWidth(ch)))
+             : CharWidth(TScreenOf(xw), ch)))
 #endif
 
 #endif /* OPT_RENDERWIDE */
@@ -3247,7 +3247,7 @@ ucs_workaround(XTermDraw * params,
        IChar eqv = (IChar) AsciiEquivs(ch);
 
        if (eqv != (IChar) ch) {
-           int width = CharWidth(ch);
+           int width = CharWidth(screen, ch);
 
            do {
                drawXtermText(params,
@@ -3939,7 +3939,7 @@ drawXtermText(XTermDraw * params,
                unsigned ch = (unsigned) text[last];
                int filler = 0;
 #if OPT_WIDE_CHARS
-               int needed = forceDbl ? 2 : CharWidth(ch);
+               int needed = forceDbl ? 2 : CharWidth(screen, ch);
                XftFont *currFont = pickXftFont(needed, font, wfont);
                XftFont *tempFont = 0;
 #define CURR_TEMP (tempFont ? tempFont : currFont)
@@ -4210,7 +4210,7 @@ drawXtermText(XTermDraw * params,
                drewBoxes = True;
                continue;
            }
-           ch_width = CharWidth(ch);
+           ch_width = CharWidth(screen, ch);
            isMissing =
                IsXtermMissingChar(screen, ch,
                                   ((recur.on_wide || ch_width > 1)
@@ -4335,7 +4335,7 @@ drawXtermText(XTermDraw * params,
 
            if (!needWide
                && !IsIcon(screen)
-               && ((recur.on_wide || CharWidth(ch) > 1)
+               && ((recur.on_wide || CharWidth(screen, ch) > 1)
                    && okFont(NormalWFont(screen)))) {
                needWide = True;
            }
@@ -5059,7 +5059,7 @@ addXtermCombining(TScreen *screen, int r
        size_t off;
 
        TRACE(("addXtermCombining %d,%d U+%04X (%d)\n",
-              row, col, ch, CharWidth(ch)));
+              row, col, ch, CharWidth(screen, ch)));
 
        for_each_combData(off, ld) {
            if (!ld->combData[off][col]) {
Index: xterm.h
===================================================================
RCS file: /cvs/xenocara/app/xterm/xterm.h,v
retrieving revision 1.47
diff -u -p -r1.47 xterm.h
--- xterm.h     2 Apr 2021 18:44:19 -0000       1.47
+++ xterm.h     14 Apr 2021 19:24:14 -0000
@@ -963,10 +963,10 @@ extern void report_char_class(XtermWidge
 #define WideCells(n) (((IChar)(n) >= first_widechar) ? my_wcwidth((wchar_t) 
(n)) : 1)
 #define isWideFrg(n) (((n) == HIDDEN_CHAR) || (WideCells((n)) == 2))
 #define isWide(n)    (((IChar)(n) >= first_widechar) && isWideFrg(n))
-#define CharWidth(n) (((n) < 256) ? (IsLatin1(n) ? 1 : 0) : 
my_wcwidth((wchar_t) (n)))
+#define CharWidth(screen, n) ((!(screen->utf8_mode) && ((n) < 256)) ? 
(IsLatin1(n) ? 1 : 0) : my_wcwidth((wchar_t) (n)))
 #else
 #define WideCells(n) 1
-#define CharWidth(n) (IsLatin1(n) ? 1 : 0)
+#define CharWidth(screen, n) (IsLatin1(n) ? 1 : 0)
 #endif
 
 /* cachedCgs.c */


Reply via email to