BASH PATCH REPORT
=================
Bash-Release: 5.3
Patch-ID: bash53-008
Bug-Reported-by: Grisha Levit <[email protected]>
Bug-Reference-ID: <[email protected]>
Bug-Reference-URL:
https://lists.gnu.org/archive/html/bug-bash/2025-10/msg00145.html
Bug-Description:
Bash tries to consume entire multibyte characters when looking for backslash
escapes in $'...' strings, and treats too many characters as potentially
beginning a multibyte character in UTF-8 locales. Being more selective about
when to call mbrtowc() can lead to optimized string processing and script
speedups. This patch also handles the unlikely situation of a locale
encoding null wide characters with non-null bytes.
Patch (apply with `patch -p0'):
*** ../bash-5.3-patched/lib/sh/strtrans.c Fri Oct 13 11:57:46 2023
--- lib/sh/strtrans.c Mon Oct 27 14:30:35 2025
***************
*** 56,60 ****
unsigned long v;
size_t clen;
! int mb_cur_max;
#if defined (HANDLE_MULTIBYTE)
wchar_t wc;
--- 56,60 ----
unsigned long v;
size_t clen;
! size_t mb_cur_max;
#if defined (HANDLE_MULTIBYTE)
wchar_t wc;
***************
*** 64,68 ****
return ((char *)0);
! mb_cur_max = MB_CUR_MAX;
#if defined (HANDLE_MULTIBYTE)
temp = 4*len + 4;
--- 64,68 ----
return ((char *)0);
! mb_cur_max = locale_mb_cur_max;
#if defined (HANDLE_MULTIBYTE)
temp = 4*len + 4;
***************
*** 80,87 ****
clen = 1;
#if defined (HANDLE_MULTIBYTE)
! if ((locale_utf8locale && (c & 0x80)) ||
! (locale_utf8locale == 0 && mb_cur_max > 0 && is_basic (c) == 0))
{
clen = mbrtowc (&wc, s - 1, mb_cur_max, 0);
if (MB_INVALIDCH (clen))
clen = 1;
--- 80,91 ----
clen = 1;
#if defined (HANDLE_MULTIBYTE)
! /* We read an entire multibyte character at a time if we are in a
! locale where a backslash can possibly appear as part of a
! multibyte character. UTF-8 encodings prohibit this. */
! if (locale_utf8locale == 0 && mb_cur_max > 1 && is_basic (c) == 0)
{
clen = mbrtowc (&wc, s - 1, mb_cur_max, 0);
+ if (MB_NULLWCH (clen))
+ break; /* it apparently can happen */
if (MB_INVALIDCH (clen))
clen = 1;
***************
*** 228,237 ****
char *r, *ret;
const char *s;
- size_t l, rsize;
unsigned char c;
size_t clen;
int b;
- #if defined (HANDLE_MULTIBYTE)
wchar_t wc;
#endif
--- 232,241 ----
char *r, *ret;
const char *s;
unsigned char c;
+ #if defined (HANDLE_MULTIBYTE)
size_t clen;
int b;
wchar_t wc;
+ DECLARE_MBSTATE;
#endif
***************
*** 239,245 ****
return ((char *)0);
! l = strlen (str);
! rsize = 4 * l + 4;
! r = ret = (char *)xmalloc (rsize);
*r++ = '$';
--- 243,247 ----
return ((char *)0);
! r = ret = (char *)xmalloc (4 * strlen (str) + 4);
*r++ = '$';
***************
*** 248,255 ****
for (s = str; c = *s; s++)
{
- b = 1; /* 1 == add backslash; 0 == no backslash */
- l = 1;
- clen = 1;
-
switch (c)
{
--- 250,253 ----
***************
*** 267,303 ****
default:
#if defined (HANDLE_MULTIBYTE)
! b = is_basic (c);
! /* XXX - clen comparison to 0 is dicey */
! if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 ||
MB_INVALIDCH (clen) || iswprint (wc) == 0)) ||
! (b == 1 && ISPRINT (c) == 0))
! #else
! if (ISPRINT (c) == 0)
! #endif
{
! *r++ = '\\';
! *r++ = TOCHAR ((c >> 6) & 07);
! *r++ = TOCHAR ((c >> 3) & 07);
! *r++ = TOCHAR (c & 07);
! continue;
}
! l = 0;
! break;
! }
! if (b == 0 && clen == 0)
! break;
! if (l)
! *r++ = '\\';
!
! if (clen == 1)
! *r++ = c;
! else
! {
! for (b = 0; b < (int)clen; b++)
! *r++ = (unsigned char)s[b];
! s += clen - 1; /* -1 because of the increment above */
}
}
*r++ = '\'';
*r = '\0';
--- 265,304 ----
default:
#if defined (HANDLE_MULTIBYTE)
! if ((locale_utf8locale && (c & 0x80)) ||
! (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c)
== 0))
{
! clen = mbrtowc (&wc, s, locale_mb_cur_max, &state);
! if (MB_NULLWCH (clen))
! goto quote_end;
! if (MB_INVALIDCH (clen))
! INITIALIZE_MBSTATE;
! else if (iswprint (wc))
! {
! for (b = 0; b < (int)clen; b++)
! *r++ = (unsigned char)s[b];
! s += clen - 1; /* -1 because of the increment above */
! continue;
! }
}
! else
! #endif
! if (ISPRINT (c))
! {
! *r++ = c;
! continue;
! }
! *r++ = '\\';
! *r++ = TOCHAR ((c >> 6) & 07);
! *r++ = TOCHAR ((c >> 3) & 07);
! *r++ = TOCHAR (c & 07);
! continue;
}
+
+ *r++ = '\\';
+ *r++ = c;
}
+ quote_end:
*r++ = '\'';
*r = '\0';
***************
*** 349,353 ****
{
#if defined (HANDLE_MULTIBYTE)
! if (is_basic (c) == 0)
return (ansic_wshouldquote (s));
#endif
--- 350,355 ----
{
#if defined (HANDLE_MULTIBYTE)
! if ((locale_utf8locale && (c & 0x80)) ||
! (locale_utf8locale == 0 && locale_mb_cur_max > 1 && is_basic (c) ==
0))
return (ansic_wshouldquote (s));
#endif
*** ../bash-5.3/patchlevel.h 2020-06-22 14:51:03.000000000 -0400
--- patchlevel.h 2020-10-01 11:01:28.000000000 -0400
***************
*** 26,30 ****
looks for to find the patch level (for the sccs version string). */
! #define PATCHLEVEL 7
#endif /* _PATCHLEVEL_H_ */
--- 26,30 ----
looks for to find the patch level (for the sccs version string). */
! #define PATCHLEVEL 8
#endif /* _PATCHLEVEL_H_ */
--
``The lyf so short, the craft so long to lerne.'' - Chaucer
``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, UTech, CWRU [email protected] http://tiswww.cwru.edu/~chet/