From: amritxyz <[email protected]>

Raw C1 bytes are not valid UTF-8. utf8decode() returns U+FFFD for
them which gets drawn on screen as a replacement character.

Fix this by skipping C1 bytes in twrite() before utf8decode() sees
them. The ESC_STR guard lets them through when inside a STR sequence
so they can still act as sequence terminators.

Also add an early return in tputc() as a safety net for any direct
callers, and call strhandle() when a C1 byte terminates a STR
sequence so OSC sequences are not silently lost.

Tested: printf '\x8f' now produces no output.
---
 st.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/st.c b/st.c
index 6f40e35..d0bf933 100644
--- a/st.c
+++ b/st.c
@@ -2396,6 +2396,9 @@ tputc(Rune u)
        Glyph *gp;
 
        control = ISCONTROL(u);
+       /* in UTF-8 mode, ignore C1 control characters early */
+       if (IS_SET(MODE_UTF8) && ISCONTROLC1(u) && !(term.esc & ESC_STR))
+               return;
        if (u < 127 || !IS_SET(MODE_UTF8)) {
                c[0] = u;
                width = len = 1;
@@ -2455,8 +2458,11 @@ check_control_code:
         */
        if (control) {
                /* in UTF-8 mode ignore handling C1 control characters */
-               if (IS_SET(MODE_UTF8) && ISCONTROLC1(u))
+               if (IS_SET(MODE_UTF8) && ISCONTROLC1(u)) {
+                       if (term.esc & ESC_STR_END)
+                               strhandle();
                        return;
+               }
                tcontrolcode(u);
                /*
                 * control codes are not shown ever
@@ -2546,6 +2552,11 @@ twrite(const char *buf, int buflen, int show_ctrl)
 
        for (n = 0; n < buflen; n += charsize) {
                if (IS_SET(MODE_UTF8)) {
+                       /* skip C1 bytes before utf8decode() mangles them */
+                       if (ISCONTROLC1(buf[n] & 0xFF) && !(term.esc & 
ESC_STR)) {
+                               charsize = 1;
+                               continue;
+                       }
                        /* process a complete utf8 char */
                        charsize = utf8decode(buf + n, &u, buflen - n);
                        if (charsize == 0)
-- 
2.53.0


Reply via email to