Package: release.debian.org
Severity: normal
Tags: bullseye
User: release.debian....@packages.debian.org
Usertags: pu
X-Debbugs-Cc: gl...@packages.debian.org, debian-b...@lists.debian.org, 
debian-gl...@lists.debian.org
Control: affects -1 + src:glibc

[ Reason ]
There are multiple fixes in this upload, all coming from the upstream
stable branch:
- Multiple crashes or memory leak in printf-family functions
- Overflow fix in the AVX2 implementation of wcsnlen

[ Impact ]
In case the update isn't approved, systems will be left with issues
which combined with other vulnerabilities might lead to denial of
service.

[ Tests ]
The upstream fixes come with additional tests, which represent a
significant part of the diff.

[ Risks ]
The most risky parts are probably the printf-family functions changes,
however those changes are in testing/sid for ~1.5 years (since glibc
2.32), but have only been identified as problematic recently. The
wcsnlen fix is in testing/sid for ~4 months. All of those changes come
with additional tests.

[ Checklist ]
  [x] *all* changes are documented in the d/changelog
  [x] I reviewed all changes and I approve them
  [x] attach debdiff against the package in (old)stable
  [x] the issue is verified as fixed in unstable

[ Changes ]
Let me comment the changelog:

 - Drop debian/patches/amd64/local-require-bmi-in-avx2-ifunc.diff
   (obsolete).

The upstream stable branch for glibc 2.31 now includes the fix
introduced in glibc 2.31-13+deb11u5 to fix some crash on some CPU.
Therefore this patch is not needed anymore.

 - Fix memory leak in printf-family functions with long multibyte strings.

   This fixes a memory leak that might lead to OOM when calling with
   long multibyte strings. The simplest reproducer is:
     printf("%.1371337ls", L"A\n");

 - Fix a crash in printf-family due to width/precision-dependent
   allocations.

   This fixes a crash due to a missing overflow check in the requested
   precision. The simplest reproducer is:
     fprintf (fp, "%2$.*1$a", 0x7fffffff, 1e200);

 - Fix a segfault in printf handling thousands separator.

   This segmentation fault has been fixed as a side effect of the
   previous fix, but comes with a specific test. The simplest reproducer
   is:
     setlocale(LC_ALL, "en_US.UTF-8");
     printf("%'1000d\n", 1000);

 - Fix an overflow in the AVX2 implementation of wcsnlen when crossing
   pages.

   The overflow happens when wcsnlen is called with a huge maxlen
   argument (e.g. (1UL << 63)), triggering an assertion in the wcsnlen
   code.
diff --git a/debian/changelog b/debian/changelog
index 50f6135b..3d95edf8 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,18 @@
+glibc (2.31-13+deb11u6) UNRELEASED; urgency=medium
+
+  [ Aurelien Jarno ]
+  * debian/patches/git-updates.diff: update from upstream stable branch:
+    - Drop debian/patches/amd64/local-require-bmi-in-avx2-ifunc.diff
+      (obsolete).
+    - Fix memory leak in printf-family functions with long multibyte strings.
+    - Fix a crash in printf-family due to width/precision-dependent
+      allocations.
+    - Fix a segfault in printf handling thousands separator.
+    - Fix an overflow in the AVX2 implementation of wcsnlen when crossing
+      pages.
+
+ -- Aurelien Jarno <aure...@debian.org>  Sun, 16 Apr 2023 18:58:33 +0200
+
 glibc (2.31-13+deb11u5) bullseye; urgency=medium
 
   * debian/patches/local-require-bmi-in-avx2-ifunc.diff: new patch extracted
diff --git a/debian/patches/amd64/local-require-bmi-in-avx2-ifunc.diff 
b/debian/patches/amd64/local-require-bmi-in-avx2-ifunc.diff
deleted file mode 100644
index 936f89ae..00000000
--- a/debian/patches/amd64/local-require-bmi-in-avx2-ifunc.diff
+++ /dev/null
@@ -1,38 +0,0 @@
-This patch is extracted from upstream commit 83c5b368226c ("x86-64: Require
-BMI2 for strchr-avx2.S"). It changes the common ifunc AVX2 selector to require
-the BMI2 instructions, and the backported fixes for memchr and strlen rely on
-that change.
-
---- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
-+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
-@@ -21,28 +21,28 @@ IFUNC_SELECTOR (void)
- 
- extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
- extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
- 
- static inline void *
- IFUNC_SELECTOR (void)
- {
-   const struct cpu_features* cpu_features = __get_cpu_features ();
- 
-   if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
-+      && CPU_FEATURES_CPU_P (cpu_features, BMI2)
-       && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
-     {
-       if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
--        && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)
--        && CPU_FEATURES_CPU_P (cpu_features, BMI2))
-+        && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
-       return OPTIMIZE (evex);
- 
-       if (CPU_FEATURES_CPU_P (cpu_features, RTM))
-       return OPTIMIZE (avx2_rtm);
- 
-       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
-       return OPTIMIZE (avx2);
-     }
- 
-   return OPTIMIZE (sse2);
- }
diff --git a/debian/patches/git-updates.diff b/debian/patches/git-updates.diff
index e4bcb9ee..63246ab1 100644
--- a/debian/patches/git-updates.diff
+++ b/debian/patches/git-updates.diff
@@ -23,16 +23,17 @@ index 242cb06f91..b487e18634 100644
  '--disable-werror'
       By default, the GNU C Library is built with '-Werror'.  If you wish
 diff --git a/NEWS b/NEWS
-index 292fbc595a..a3278be684 100644
+index 292fbc595a..8a20d3c4e3 100644
 --- a/NEWS
 +++ b/NEWS
-@@ -5,6 +5,90 @@ See the end for copying conditions.
+@@ -5,6 +5,94 @@ See the end for copying conditions.
  Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
  using `glibc' in the "product" field.
  
 +Version 2.31.1
 +
 +The following bugs are resolved with this release:
++  [14231] stdio-common tests memory requirements
 +  [19519] iconv(1) with -c option hangs on illegal multi-byte sequences
 +    (CVE-2016-10228)
 +  [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT
@@ -48,6 +49,7 @@ index 292fbc595a..a3278be684 100644
 +  [25635] arm: Wrong sysdep order selection for soft-fp
 +  [25639] localedata: Some names of days and months wrongly spelt in
 +    Occitan
++  [25691] stdio: Remove memory leak from multibyte convertion
 +  [25715] system() returns wrong errors when posix_spawn fails
 +  [25810] x32: Incorrect syscall entries with pointer, off_t and size_t
 +  [25896] Incorrect prctl
@@ -55,6 +57,7 @@ index 292fbc595a..a3278be684 100644
 +  [25933] Off by one error in __strncmp_avx2
 +  [25966] Incorrect access of __x86_shared_non_temporal_threshold for x32
 +  [25976] nss_compat: internal_end*ent may clobber errno, hiding ERANGE
++  [26211] printf integer overflow calculating allocation size
 +  [26224] iconv hangs when converting some invalid inputs from several IBM
 +    character sets (CVE-2020-27618)
 +  [26248] Incorrect argument types for INLINE_SETXID_SYSCALL
@@ -72,6 +75,7 @@ index 292fbc595a..a3278be684 100644
 +  [28769] CVE-2021-3999: Off-by-one buffer overflow/underflow in getcwd()
 +  [28896] strncmp-avx2-rtm and wcsncmp-avx2-rtm fallback on non-rtm
 +    variants when avoiding overflow
++  [29530] segfault in printf handling thousands separator
 +
 +Security related changes:
 +
@@ -117,7 +121,7 @@ index 292fbc595a..a3278be684 100644
  Version 2.31
  
  Major new features:
-@@ -141,6 +225,18 @@ Changes to build and runtime requirements:
+@@ -141,6 +229,18 @@ Changes to build and runtime requirements:
    source tree.  ChangeLog files are located in the ChangeLog.old directory as
    ChangeLog.N where the highest N has the latest entries.
  
@@ -4022,6 +4026,941 @@ index 0000000000..29c2a81afd
 +}
 +
 +#include <support/test-driver.c>
+diff --git a/stdio-common/Makefile b/stdio-common/Makefile
+index 95af0c12d7..5e92d6b9ae 100644
+--- a/stdio-common/Makefile
++++ b/stdio-common/Makefile
+@@ -66,6 +66,10 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
+        tst-scanf-round \
+        tst-renameat2 tst-bz11319 tst-bz11319-fortify2 \
+        scanf14a scanf16a \
++       tst-printf-bz25691 \
++       tst-vfprintf-width-prec-alloc \
++       tst-grouping2 \
++  # tests
+ 
+ 
+ test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble
+@@ -75,10 +79,12 @@ tests-special += $(objpfx)tst-unbputc.out 
$(objpfx)tst-printf.out \
+                $(objpfx)tst-printf-bz18872-mem.out \
+                $(objpfx)tst-setvbuf1-cmp.out \
+                $(objpfx)tst-vfprintf-width-prec-mem.out \
+-               $(objpfx)tst-printfsz-islongdouble.out
++               $(objpfx)tst-printfsz-islongdouble.out \
++               $(objpfx)tst-printf-bz25691-mem.out
+ generated += tst-printf-bz18872.c tst-printf-bz18872.mtrace \
+            tst-printf-bz18872-mem.out \
+-           tst-vfprintf-width-prec.mtrace tst-vfprintf-width-prec-mem.out
++           tst-vfprintf-width-prec.mtrace tst-vfprintf-width-prec-mem.out \
++           tst-printf-bz25691.mtrace tst-printf-bz25691-mem.out
+ endif
+ 
+ include ../Rules
+@@ -91,6 +97,7 @@ $(objpfx)bug14.out: $(gen-locales)
+ $(objpfx)scanf13.out: $(gen-locales)
+ $(objpfx)test-vfprintf.out: $(gen-locales)
+ $(objpfx)tst-grouping.out: $(gen-locales)
++$(objpfx)tst-grouping2.out: $(gen-locales)
+ $(objpfx)tst-sprintf.out: $(gen-locales)
+ $(objpfx)tst-sscanf.out: $(gen-locales)
+ $(objpfx)tst-swprintf.out: $(gen-locales)
+@@ -100,6 +107,8 @@ endif
+ tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace
+ tst-vfprintf-width-prec-ENV = \
+   MALLOC_TRACE=$(objpfx)tst-vfprintf-width-prec.mtrace
++tst-printf-bz25691-ENV = \
++  MALLOC_TRACE=$(objpfx)tst-printf-bz25691.mtrace
+ 
+ $(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc
+       $(SHELL) $< $(common-objpfx) '$(test-program-prefix)' > $@; \
+diff --git a/stdio-common/bug22.c b/stdio-common/bug22.c
+index b3d48eb8e1..029b549941 100644
+--- a/stdio-common/bug22.c
++++ b/stdio-common/bug22.c
+@@ -57,7 +57,7 @@ do_test (void)
+ 
+   ret = fprintf (fp, "%." SN3 "d", 1);
+   printf ("ret = %d\n", ret);
+-  if (ret != -1 || errno != EOVERFLOW)
++  if (ret != N3)
+         return 1;
+ 
+   /* GCC 9 warns about output of more than INT_MAX characters; this is
+diff --git a/stdio-common/tst-grouping2.c b/stdio-common/tst-grouping2.c
+new file mode 100644
+index 0000000000..3024c942a6
+--- /dev/null
++++ b/stdio-common/tst-grouping2.c
+@@ -0,0 +1,39 @@
++/* Test printf with grouping and large width (bug 29530)
++   Copyright (C) 2022 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <locale.h>
++#include <stdio.h>
++#include <support/check.h>
++#include <support/support.h>
++
++static int
++do_test (void)
++{
++  const int field_width = 1000;
++  char buf[field_width + 1];
++
++  xsetlocale (LC_NUMERIC, "de_DE.UTF-8");
++
++  /* This used to crash in group_number.  */
++  TEST_COMPARE (sprintf (buf, "%'*d", field_width, 1000), field_width);
++  TEST_COMPARE_STRING (buf + field_width - 6, " 1.000");
++
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/stdio-common/tst-printf-bz25691.c 
b/stdio-common/tst-printf-bz25691.c
+new file mode 100644
+index 0000000000..37b30a3a8a
+--- /dev/null
++++ b/stdio-common/tst-printf-bz25691.c
+@@ -0,0 +1,108 @@
++/* Test for memory leak with large width (BZ#25691).
++   Copyright (C) 2020 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <wchar.h>
++#include <stdint.h>
++#include <locale.h>
++
++#include <mcheck.h>
++#include <support/check.h>
++#include <support/support.h>
++
++static int
++do_test (void)
++{
++  mtrace ();
++
++  /* For 's' conversion specifier with 'l' modifier the array must be
++     converted to multibyte characters up to the precision specific
++     value.  */
++  {
++    /* The input size value is to force a heap allocation on temporary
++       buffer (in the old implementation).  */
++    const size_t winputsize = 64 * 1024 + 1;
++    wchar_t *winput = xmalloc (winputsize * sizeof (wchar_t));
++    wmemset (winput, L'a', winputsize - 1);
++    winput[winputsize - 1] = L'\0';
++
++    char result[9];
++    const char expected[] = "aaaaaaaa";
++    int ret;
++
++    ret = snprintf (result, sizeof (result), "%.65537ls", winput);
++    TEST_COMPARE (ret, winputsize - 1);
++    TEST_COMPARE_BLOB (result, sizeof (result), expected, sizeof (expected));
++
++    ret = snprintf (result, sizeof (result), "%ls", winput);
++    TEST_COMPARE (ret, winputsize - 1);
++    TEST_COMPARE_BLOB (result, sizeof (result), expected, sizeof (expected));
++
++    free (winput);
++  }
++
++  /* For 's' converstion specifier the array is interpreted as a multibyte
++     character sequence and converted to wide characters up to the precision
++     specific value.  */
++  {
++    /* The input size value is to force a heap allocation on temporary
++       buffer (in the old implementation).  */
++    const size_t mbssize = 32 * 1024;
++    char *mbs = xmalloc (mbssize);
++    memset (mbs, 'a', mbssize - 1);
++    mbs[mbssize - 1] = '\0';
++
++    const size_t expectedsize = 32 * 1024;
++    wchar_t *expected = xmalloc (expectedsize * sizeof (wchar_t));
++    wmemset (expected, L'a', expectedsize - 1);
++    expected[expectedsize-1] = L'\0';
++
++    const size_t resultsize = mbssize * sizeof (wchar_t);
++    wchar_t *result = xmalloc (resultsize);
++    int ret;
++
++    ret = swprintf (result, resultsize, L"%.65537s", mbs);
++    TEST_COMPARE (ret, mbssize - 1);
++    TEST_COMPARE_BLOB (result, (ret + 1) * sizeof (wchar_t),
++                     expected, expectedsize * sizeof (wchar_t));
++
++    ret = swprintf (result, resultsize, L"%1$.65537s", mbs);
++    TEST_COMPARE (ret, mbssize - 1);
++    TEST_COMPARE_BLOB (result, (ret + 1) * sizeof (wchar_t),
++                     expected, expectedsize * sizeof (wchar_t));
++
++    /* Same test, but with an invalid multibyte sequence.  */
++    mbs[mbssize - 2] = 0xff;
++
++    ret = swprintf (result, resultsize, L"%.65537s", mbs);
++    TEST_COMPARE (ret, -1);
++
++    ret = swprintf (result, resultsize, L"%1$.65537s", mbs);
++    TEST_COMPARE (ret, -1);
++
++    free (mbs);
++    free (result);
++    free (expected);
++  }
++
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/stdio-common/tst-vfprintf-width-prec-alloc.c 
b/stdio-common/tst-vfprintf-width-prec-alloc.c
+new file mode 100644
+index 0000000000..0a74b53a33
+--- /dev/null
++++ b/stdio-common/tst-vfprintf-width-prec-alloc.c
+@@ -0,0 +1,41 @@
++/* Test large width or precision does not involve large allocation.
++   Copyright (C) 2020 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <stdio.h>
++#include <sys/resource.h>
++#include <support/check.h>
++
++char test_string[] = "test";
++
++static int
++do_test (void)
++{
++  struct rlimit limit;
++  TEST_VERIFY_EXIT (getrlimit (RLIMIT_AS, &limit) == 0);
++  limit.rlim_cur = 200 * 1024 * 1024;
++  TEST_VERIFY_EXIT (setrlimit (RLIMIT_AS, &limit) == 0);
++  FILE *fp = fopen ("/dev/null", "w");
++  TEST_VERIFY_EXIT (fp != NULL);
++  TEST_COMPARE (fprintf (fp, "%1000000000d", 1), 1000000000);
++  TEST_COMPARE (fprintf (fp, "%.1000000000s", test_string), 4);
++  TEST_COMPARE (fprintf (fp, "%1000000000d %1000000000d", 1, 2), 2000000001);
++  TEST_COMPARE (fprintf (fp, "%2$.*1$s", 0x7fffffff, test_string), 4);
++  return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/stdio-common/vfprintf-internal.c 
b/stdio-common/vfprintf-internal.c
+index 3be92d4b6e..b1c8f5c43e 100644
+--- a/stdio-common/vfprintf-internal.c
++++ b/stdio-common/vfprintf-internal.c
+@@ -31,6 +31,7 @@
+ #include <locale/localeinfo.h>
+ #include <stdio.h>
+ #include <scratch_buffer.h>
++#include <intprops.h>
+ 
+ /* This code is shared between the standard stdio implementation found
+    in GNU C library and the libio implementation originally found in
+@@ -45,10 +46,6 @@
+ #include <wctype.h>
+ #endif
+ 
+-/* In some cases we need extra space for all the output which is not
+-   counted in the width of the string. We assume 32 characters is
+-   enough.  */
+-#define EXTSIZ                32
+ #define ARGCHECK(S, Format) \
+   do                                                                        \
+     {                                                                       \
+@@ -119,22 +116,38 @@
+   while (0)
+ #endif
+ 
+-#define done_add(val) \
+-  do {                                                                        
      \
+-    unsigned int _val = val;                                                \
+-    assert ((unsigned int) done < (unsigned int) INT_MAX);                  \
+-    if (__glibc_unlikely (INT_MAX - done < _val))                           \
+-      {                                                                       
      \
+-      done = -1;                                                            \
+-       __set_errno (EOVERFLOW);                                             \
+-      goto all_done;                                                        \
+-      }                                                                       
      \
+-    done += _val;                                                           \
+-  } while (0)
++/* Add LENGTH to DONE.  Return the new value of DONE, or -1 on
++   overflow (and set errno accordingly).  */
++static inline int
++done_add_func (size_t length, int done)
++{
++  if (done < 0)
++    return done;
++  int ret;
++  if (INT_ADD_WRAPV (done, length, &ret))
++    {
++      __set_errno (EOVERFLOW);
++      return -1;
++    }
++  return ret;
++}
++
++#define done_add(val)                                                 \
++  do                                                                  \
++    {                                                                 \
++      /* Ensure that VAL has a type similar to int.  */                       
\
++      _Static_assert (sizeof (val) == sizeof (int), "value int size");        
\
++      _Static_assert ((__typeof__ (val)) -1 < 0, "value signed");     \
++      done = done_add_func ((val), done);                             \
++      if (done < 0)                                                   \
++      goto all_done;                                                  \
++    }                                                                 \
++  while (0)
+ 
+ #ifndef COMPILE_WPRINTF
+ # define vfprintf     __vfprintf_internal
+ # define CHAR_T               char
++# define OTHER_CHAR_T   wchar_t
+ # define UCHAR_T      unsigned char
+ # define INT_T                int
+ typedef const char *THOUSANDS_SEP_T;
+@@ -143,25 +156,14 @@ typedef const char *THOUSANDS_SEP_T;
+ # define STR_LEN(Str) strlen (Str)
+ 
+ # define PUT(F, S, N) _IO_sputn ((F), (S), (N))
+-# define PAD(Padchar) \
+-  do {                                                                        
      \
+-    if (width > 0)                                                          \
+-      {                                                                       
      \
+-      ssize_t written = _IO_padn (s, (Padchar), width);                     \
+-      if (__glibc_unlikely (written != width))                              \
+-        {                                                                   \
+-          done = -1;                                                        \
+-          goto all_done;                                                    \
+-        }                                                                   \
+-      done_add (written);                                                   \
+-      }                                                                       
      \
+-  } while (0)
+ # define PUTC(C, F)   _IO_putc_unlocked (C, F)
+ # define ORIENT               if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, 
-1) != -1)\
+                         return -1
++# define CONVERT_FROM_OTHER_STRING __wcsrtombs
+ #else
+ # define vfprintf     __vfwprintf_internal
+ # define CHAR_T               wchar_t
++# define OTHER_CHAR_T   char
+ /* This is a hack!!!  There should be a type uwchar_t.  */
+ # define UCHAR_T      unsigned int /* uwchar_t */
+ # define INT_T                wint_t
+@@ -173,21 +175,9 @@ typedef wchar_t THOUSANDS_SEP_T;
+ # include <_itowa.h>
+ 
+ # define PUT(F, S, N) _IO_sputn ((F), (S), (N))
+-# define PAD(Padchar) \
+-  do {                                                                        
      \
+-    if (width > 0)                                                          \
+-      {                                                                       
      \
+-      ssize_t written = _IO_wpadn (s, (Padchar), width);                    \
+-      if (__glibc_unlikely (written != width))                              \
+-        {                                                                   \
+-          done = -1;                                                        \
+-          goto all_done;                                                    \
+-        }                                                                   \
+-      done_add (written);                                                   \
+-      }                                                                       
      \
+-  } while (0)
+ # define PUTC(C, F)   _IO_putwc_unlocked (C, F)
+ # define ORIENT               if (_IO_fwide (s, 1) != 1) return -1
++# define CONVERT_FROM_OTHER_STRING __mbsrtowcs
+ 
+ # undef _itoa
+ # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case)
+@@ -196,6 +186,33 @@ typedef wchar_t THOUSANDS_SEP_T;
+ # define EOF WEOF
+ #endif
+ 
++static inline int
++pad_func (FILE *s, CHAR_T padchar, int width, int done)
++{
++  if (width > 0)
++    {
++      ssize_t written;
++#ifndef COMPILE_WPRINTF
++      written = _IO_padn (s, padchar, width);
++#else
++      written = _IO_wpadn (s, padchar, width);
++#endif
++      if (__glibc_unlikely (written != width))
++      return -1;
++      return done_add_func (width, done);
++    }
++  return done;
++}
++
++#define PAD(Padchar)                                                  \
++  do                                                                  \
++    {                                                                 \
++      done = pad_func (s, (Padchar), width, done);                    \
++      if (done < 0)                                                   \
++      goto all_done;                                                  \
++    }                                                                 \
++  while (0)
++
+ #include "_i18n_number.h"
+ 
+ /* Include the shared code for parsing the format string.  */
+@@ -215,24 +232,115 @@ typedef wchar_t THOUSANDS_SEP_T;
+     }                                                                       \
+   while (0)
+ 
+-#define outstring(String, Len)                                                
      \
+-  do                                                                        \
+-    {                                                                       \
+-      assert ((size_t) done <= (size_t) INT_MAX);                           \
+-      if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len))                
      \
+-      {                                                                     \
+-        done = -1;                                                          \
+-        goto all_done;                                                      \
+-      }                                                                     \
+-      if (__glibc_unlikely (INT_MAX - done < (Len)))                        \
+-      {                                                                       
      \
+-      done = -1;                                                            \
+-       __set_errno (EOVERFLOW);                                             \
+-      goto all_done;                                                        \
+-      }                                                                       
      \
+-      done += (Len);                                                        \
+-    }                                                                       \
+-  while (0)
++static inline int
++outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done)
++{
++  assert ((size_t) done <= (size_t) INT_MAX);
++  if ((size_t) PUT (s, string, length) != (size_t) (length))
++    return -1;
++  return done_add_func (length, done);
++}
++
++#define outstring(String, Len)                                                
\
++  do                                                                  \
++    {                                                                 \
++      const void *string_ = (String);                                 \
++      done = outstring_func (s, string_, (Len), done);                        
\
++      if (done < 0)                                                   \
++      goto all_done;                                                  \
++    }                                                                 \
++   while (0)
++
++/* Write the string SRC to S.  If PREC is non-negative, write at most
++   PREC bytes.  If LEFT is true, perform left justification.  */
++static int
++outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec,
++                               int width, bool left, int done)
++{
++  /* Use a small buffer to combine processing of multiple characters.
++     CONVERT_FROM_OTHER_STRING expects the buffer size in (wide)
++     characters, and buf_length counts that.  */
++  enum { buf_length = 256 / sizeof (CHAR_T) };
++  CHAR_T buf[buf_length];
++  _Static_assert (sizeof (buf) > MB_LEN_MAX,
++                "buffer is large enough for a single multi-byte character");
++
++  /* Add the initial padding if needed.  */
++  if (width > 0 && !left)
++    {
++      /* Make a first pass to find the output width, so that we can
++       add the required padding.  */
++      mbstate_t mbstate = { 0 };
++      const OTHER_CHAR_T *src_copy = src;
++      size_t total_written;
++      if (prec < 0)
++      total_written = CONVERT_FROM_OTHER_STRING
++        (NULL, &src_copy, 0, &mbstate);
++      else
++      {
++        /* The source might not be null-terminated.  Enforce the
++           limit manually, based on the output length.  */
++        total_written = 0;
++        size_t limit = prec;
++        while (limit > 0 && src_copy != NULL)
++          {
++            size_t write_limit = buf_length;
++            if (write_limit > limit)
++              write_limit = limit;
++            size_t written = CONVERT_FROM_OTHER_STRING
++              (buf, &src_copy, write_limit, &mbstate);
++            if (written == (size_t) -1)
++              return -1;
++            if (written == 0)
++              break;
++            total_written += written;
++            limit -= written;
++          }
++      }
++
++      /* Output initial padding.  */
++      if (total_written < width)
++      {
++        done = pad_func (s, L_(' '), width - total_written, done);
++        if (done < 0)
++          return done;
++      }
++    }
++
++  /* Convert the input string, piece by piece.  */
++  size_t total_written = 0;
++  {
++    mbstate_t mbstate = { 0 };
++    /* If prec is negative, remaining is not decremented, otherwise,
++      it serves as the write limit.  */
++    size_t remaining = -1;
++    if (prec >= 0)
++      remaining = prec;
++    while (remaining > 0 && src != NULL)
++      {
++      size_t write_limit = buf_length;
++      if (remaining < write_limit)
++        write_limit = remaining;
++      size_t written = CONVERT_FROM_OTHER_STRING
++        (buf, &src, write_limit, &mbstate);
++      if (written == (size_t) -1)
++        return -1;
++      if (written == 0)
++        break;
++      done = outstring_func (s, (const UCHAR_T *) buf, written, done);
++      if (done < 0)
++        return done;
++      total_written += written;
++      if (prec >= 0)
++        remaining -= written;
++      }
++  }
++
++  /* Add final padding.  */
++  if (width > 0 && left && total_written < width)
++    return pad_func (s, L_(' '), width - total_written, done);
++  return done;
++}
+ 
+ /* For handling long_double and longlong we use the same flag.  If
+    `long' and `long long' are effectively the same type define it to
+@@ -1022,7 +1130,6 @@ static const uint8_t jump_table[] =
+     LABEL (form_string):                                                    \
+       {                                                                       
      \
+       size_t len;                                                           \
+-      int string_malloced;                                                  \
+                                                                             \
+       /* The string argument could in fact be `char *' or `wchar_t *'.      \
+          But this should not make a difference here.  */                    \
+@@ -1034,7 +1141,6 @@ static const uint8_t jump_table[] =
+       /* Entry point for printing other strings.  */                        \
+       LABEL (print_string):                                                 \
+                                                                             \
+-      string_malloced = 0;                                                  \
+       if (string == NULL)                                                   \
+         {                                                                   \
+           /* Write "(null)" if there's space.  */                           \
+@@ -1051,41 +1157,12 @@ static const uint8_t jump_table[] =
+         }                                                                   \
+       else if (!is_long && spec != L_('S'))                                 \
+         {                                                                   \
+-          /* This is complicated.  We have to transform the multibyte       \
+-             string into a wide character string.  */                       \
+-          const char *mbs = (const char *) string;                          \
+-          mbstate_t mbstate;                                                \
+-                                                                            \
+-          len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \
+-                                                                            \
+-          /* Allocate dynamically an array which definitely is long         \
+-             enough for the wide character version.  Each byte in the       \
+-             multi-byte string can produce at most one wide character.  */  \
+-          if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t)))         \
+-            {                                                               \
+-              __set_errno (EOVERFLOW);                                      \
+-              done = -1;                                                    \
+-              goto all_done;                                                \
+-            }                                                               \
+-          else if (__libc_use_alloca (len * sizeof (wchar_t)))              \
+-            string = (CHAR_T *) alloca (len * sizeof (wchar_t));            \
+-          else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t)))    \
+-                   == NULL)                                                 \
+-            {                                                               \
+-              done = -1;                                                    \
+-              goto all_done;                                                \
+-            }                                                               \
+-          else                                                              \
+-            string_malloced = 1;                                            \
+-                                                                            \
+-          memset (&mbstate, '\0', sizeof (mbstate_t));                      \
+-          len = __mbsrtowcs (string, &mbs, len, &mbstate);                  \
+-          if (len == (size_t) -1)                                           \
+-            {                                                               \
+-              /* Illegal multibyte character.  */                           \
+-              done = -1;                                                    \
+-              goto all_done;                                                \
+-            }                                                               \
++          done = outstring_converted_wide_string                            \
++            (s, (const char *) string, prec, width, left, done);            \
++          if (done < 0)                                                     \
++            goto all_done;                                                  \
++          /* The padding has already been written.  */                      \
++          break;                                                            \
+         }                                                                   \
+       else                                                                  \
+         {                                                                   \
+@@ -1108,8 +1185,6 @@ static const uint8_t jump_table[] =
+       outstring (string, len);                                              \
+       if (left)                                                             \
+         PAD (L' ');                                                         \
+-      if (__glibc_unlikely (string_malloced))                               \
+-        free (string);                                                      \
+       }                                                                       
      \
+       break;
+ #else
+@@ -1158,7 +1233,6 @@ static const uint8_t jump_table[] =
+     LABEL (form_string):                                                    \
+       {                                                                       
      \
+       size_t len;                                                           \
+-      int string_malloced;                                                  \
+                                                                             \
+       /* The string argument could in fact be `char *' or `wchar_t *'.      \
+          But this should not make a difference here.  */                    \
+@@ -1170,7 +1244,6 @@ static const uint8_t jump_table[] =
+       /* Entry point for printing other strings.  */                        \
+       LABEL (print_string):                                                 \
+                                                                             \
+-      string_malloced = 0;                                                  \
+       if (string == NULL)                                                   \
+         {                                                                   \
+           /* Write "(null)" if there's space.  */                           \
+@@ -1196,51 +1269,12 @@ static const uint8_t jump_table[] =
+         }                                                                   \
+       else                                                                  \
+         {                                                                   \
+-          const wchar_t *s2 = (const wchar_t *) string;                     \
+-          mbstate_t mbstate;                                                \
+-                                                                            \
+-          memset (&mbstate, '\0', sizeof (mbstate_t));                      \
+-                                                                            \
+-          if (prec >= 0)                                                    \
+-            {                                                               \
+-              /* The string `s2' might not be NUL terminated.  */           \
+-              if (__libc_use_alloca (prec))                                 \
+-                string = (char *) alloca (prec);                            \
+-              else if ((string = (char *) malloc (prec)) == NULL)           \
+-                {                                                           \
+-                  done = -1;                                                \
+-                  goto all_done;                                            \
+-                }                                                           \
+-              else                                                          \
+-                string_malloced = 1;                                        \
+-              len = __wcsrtombs (string, &s2, prec, &mbstate);              \
+-            }                                                               \
+-          else                                                              \
+-            {                                                               \
+-              len = __wcsrtombs (NULL, &s2, 0, &mbstate);                   \
+-              if (len != (size_t) -1)                                       \
+-                {                                                           \
+-                  assert (__mbsinit (&mbstate));                            \
+-                  s2 = (const wchar_t *) string;                            \
+-                  if (__libc_use_alloca (len + 1))                          \
+-                    string = (char *) alloca (len + 1);                     \
+-                  else if ((string = (char *) malloc (len + 1)) == NULL)    \
+-                    {                                                       \
+-                      done = -1;                                            \
+-                      goto all_done;                                        \
+-                    }                                                       \
+-                  else                                                      \
+-                    string_malloced = 1;                                    \
+-                  (void) __wcsrtombs (string, &s2, len + 1, &mbstate);      \
+-                }                                                           \
+-            }                                                               \
+-                                                                            \
+-          if (len == (size_t) -1)                                           \
+-            {                                                               \
+-              /* Illegal wide-character string.  */                         \
+-              done = -1;                                                    \
+-              goto all_done;                                                \
+-            }                                                               \
++          done = outstring_converted_wide_string                            \
++            (s, (const wchar_t *) string, prec, width, left, done);         \
++          if (done < 0)                                                     \
++            goto all_done;                                                  \
++          /* The padding has already been written.  */                      \
++          break;                                                            \
+         }                                                                   \
+                                                                             \
+       if ((width -= len) < 0)                                               \
+@@ -1254,8 +1288,6 @@ static const uint8_t jump_table[] =
+       outstring (string, len);                                              \
+       if (left)                                                             \
+         PAD (' ');                                                          \
+-      if (__glibc_unlikely (string_malloced))                               \
+-        free (string);                                                      \
+       }                                                                       
      \
+       break;
+ #endif
+@@ -1307,7 +1339,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+ 
+   /* Buffer intermediate results.  */
+   CHAR_T work_buffer[WORK_BUFFER_SIZE];
+-  CHAR_T *workstart = NULL;
+   CHAR_T *workend;
+ 
+   /* We have to save the original argument pointer.  */
+@@ -1416,7 +1447,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+       UCHAR_T pad = L_(' ');/* Padding character.  */
+       CHAR_T spec;
+ 
+-      workstart = NULL;
+       workend = work_buffer + WORK_BUFFER_SIZE;
+ 
+       /* Get current character in format string.  */
+@@ -1508,31 +1538,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+           pad = L_(' ');
+           left = 1;
+         }
+-
+-      if (__glibc_unlikely (width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
+-        {
+-          __set_errno (EOVERFLOW);
+-          done = -1;
+-          goto all_done;
+-        }
+-
+-      if (width >= WORK_BUFFER_SIZE - EXTSIZ)
+-        {
+-          /* We have to use a special buffer.  */
+-          size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T);
+-          if (__libc_use_alloca (needed))
+-            workend = (CHAR_T *) alloca (needed) + width + EXTSIZ;
+-          else
+-            {
+-              workstart = (CHAR_T *) malloc (needed);
+-              if (workstart == NULL)
+-                {
+-                  done = -1;
+-                  goto all_done;
+-                }
+-              workend = workstart + width + EXTSIZ;
+-            }
+-        }
+       }
+       JUMP (*f, step1_jumps);
+ 
+@@ -1540,31 +1545,13 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+     LABEL (width):
+       width = read_int (&f);
+ 
+-      if (__glibc_unlikely (width == -1
+-                          || width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
++      if (__glibc_unlikely (width == -1))
+       {
+         __set_errno (EOVERFLOW);
+         done = -1;
+         goto all_done;
+       }
+ 
+-      if (width >= WORK_BUFFER_SIZE - EXTSIZ)
+-      {
+-        /* We have to use a special buffer.  */
+-        size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T);
+-        if (__libc_use_alloca (needed))
+-          workend = (CHAR_T *) alloca (needed) + width + EXTSIZ;
+-        else
+-          {
+-            workstart = (CHAR_T *) malloc (needed);
+-            if (workstart == NULL)
+-              {
+-                done = -1;
+-                goto all_done;
+-              }
+-            workend = workstart + width + EXTSIZ;
+-          }
+-      }
+       if (*f == L_('$'))
+       /* Oh, oh.  The argument comes from a positional parameter.  */
+       goto do_positional;
+@@ -1613,34 +1600,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+       }
+       else
+       prec = 0;
+-      if (prec > width && prec > WORK_BUFFER_SIZE - EXTSIZ)
+-      {
+-        /* Deallocate any previously allocated buffer because it is
+-           too small.  */
+-        if (__glibc_unlikely (workstart != NULL))
+-          free (workstart);
+-        workstart = NULL;
+-        if (__glibc_unlikely (prec >= INT_MAX / sizeof (CHAR_T) - EXTSIZ))
+-          {
+-            __set_errno (EOVERFLOW);
+-            done = -1;
+-            goto all_done;
+-          }
+-        size_t needed = ((size_t) prec + EXTSIZ) * sizeof (CHAR_T);
+-
+-        if (__libc_use_alloca (needed))
+-          workend = (CHAR_T *) alloca (needed) + prec + EXTSIZ;
+-        else
+-          {
+-            workstart = (CHAR_T *) malloc (needed);
+-            if (workstart == NULL)
+-              {
+-                done = -1;
+-                goto all_done;
+-              }
+-            workend = workstart + prec + EXTSIZ;
+-          }
+-      }
+       JUMP (*f, step2_jumps);
+ 
+       /* Process 'h' modifier.  There might another 'h' following.  */
+@@ -1704,10 +1663,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+       /* The format is correctly handled.  */
+       ++nspecs_done;
+ 
+-      if (__glibc_unlikely (workstart != NULL))
+-      free (workstart);
+-      workstart = NULL;
+-
+       /* Look for next format specifier.  */
+ #ifdef COMPILE_WPRINTF
+       f = __find_specwc ((end_of_spec = ++f));
+@@ -1725,18 +1680,11 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, 
unsigned int mode_flags)
+ 
+   /* Hand off processing for positional parameters.  */
+ do_positional:
+-  if (__glibc_unlikely (workstart != NULL))
+-    {
+-      free (workstart);
+-      workstart = NULL;
+-    }
+   done = printf_positional (s, format, readonly_format, ap, &ap_save,
+                           done, nspecs_done, lead_str_end, work_buffer,
+                           save_errno, grouping, thousands_sep, mode_flags);
+ 
+  all_done:
+-  if (__glibc_unlikely (workstart != NULL))
+-    free (workstart);
+   /* Unlock the stream.  */
+   _IO_funlockfile (s);
+   _IO_cleanup_region_end (0);
+@@ -1780,8 +1728,6 @@ printf_positional (FILE *s, const CHAR_T *format, int 
readonly_format,
+   /* Just a counter.  */
+   size_t cnt;
+ 
+-  CHAR_T *workstart = NULL;
+-
+   if (grouping == (const char *) -1)
+     {
+ #ifdef COMPILE_WPRINTF
+@@ -1974,7 +1920,6 @@ printf_positional (FILE *s, const CHAR_T *format, int 
readonly_format,
+       char pad = specs[nspecs_done].info.pad;
+       CHAR_T spec = specs[nspecs_done].info.spec;
+ 
+-      workstart = NULL;
+       CHAR_T *workend = work_buffer + WORK_BUFFER_SIZE;
+ 
+       /* Fill in last information.  */
+@@ -2008,27 +1953,6 @@ printf_positional (FILE *s, const CHAR_T *format, int 
readonly_format,
+         prec = specs[nspecs_done].info.prec;
+       }
+ 
+-      /* Maybe the buffer is too small.  */
+-      if (MAX (prec, width) + EXTSIZ > WORK_BUFFER_SIZE)
+-      {
+-        if (__libc_use_alloca ((MAX (prec, width) + EXTSIZ)
+-                               * sizeof (CHAR_T)))
+-          workend = ((CHAR_T *) alloca ((MAX (prec, width) + EXTSIZ)
+-                                        * sizeof (CHAR_T))
+-                     + (MAX (prec, width) + EXTSIZ));
+-        else
+-          {
+-            workstart = (CHAR_T *) malloc ((MAX (prec, width) + EXTSIZ)
+-                                           * sizeof (CHAR_T));
+-            if (workstart == NULL)
+-              {
+-                done = -1;
+-                goto all_done;
+-              }
+-            workend = workstart + (MAX (prec, width) + EXTSIZ);
+-          }
+-      }
+-
+       /* Process format specifiers.  */
+       while (1)
+       {
+@@ -2102,18 +2026,12 @@ printf_positional (FILE *s, const CHAR_T *format, int 
readonly_format,
+         break;
+       }
+ 
+-      if (__glibc_unlikely (workstart != NULL))
+-      free (workstart);
+-      workstart = NULL;
+-
+       /* Write the following constant string.  */
+       outstring (specs[nspecs_done].end_of_fmt,
+                specs[nspecs_done].next_fmt
+                - specs[nspecs_done].end_of_fmt);
+     }
+  all_done:
+-  if (__glibc_unlikely (workstart != NULL))
+-    free (workstart);
+   scratch_buffer_free (&argsbuf);
+   scratch_buffer_free (&specsbuf);
+   return done;
+@@ -2236,7 +2154,8 @@ group_number (CHAR_T *front_ptr, CHAR_T *w, CHAR_T 
*rear_ptr,
+           copy_rest:
+             /* No further grouping to be done.  Copy the rest of the
+                number.  */
+-            memmove (w, s, (front_ptr -s) * sizeof (CHAR_T));
++            w -= s - front_ptr;
++            memmove (w, front_ptr, (s - front_ptr) * sizeof (CHAR_T));
+             break;
+           }
+         else if (*grouping != '\0')
 diff --git a/stdlib/Makefile b/stdlib/Makefile
 index 45214b59e4..4615f6dfe7 100644
 --- a/stdlib/Makefile
@@ -4594,7 +5533,7 @@ index d961ac4493..02806f4ebd 100644
    printf ("%23s", "");
    FOR_EACH_IMPL (impl, 0)
 diff --git a/string/test-strnlen.c b/string/test-strnlen.c
-index 80ac9e8602..a1a6746cc9 100644
+index 80ac9e8602..ca34352b0d 100644
 --- a/string/test-strnlen.c
 +++ b/string/test-strnlen.c
 @@ -27,6 +27,7 @@
@@ -4613,46 +5552,73 @@ index 80ac9e8602..a1a6746cc9 100644
  # define CHAR wchar_t
  # define BIG_CHAR WCHAR_MAX
  # define MIDDLE_CHAR 1121
-@@ -87,6 +89,38 @@ do_test (size_t align, size_t len, size_t maxlen, int 
max_char)
+@@ -73,7 +75,7 @@ do_test (size_t align, size_t len, size_t maxlen, int 
max_char)
+ {
+   size_t i;
+ 
+-  align &= 63;
++  align &= (getpagesize () / sizeof (CHAR) - 1);
+   if ((align + len) * sizeof (CHAR) >= page_size)
+     return;
+ 
+@@ -87,6 +89,56 @@ do_test (size_t align, size_t len, size_t maxlen, int 
max_char)
      do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen));
  }
  
 +static void
 +do_overflow_tests (void)
 +{
-+  size_t i, j, len;
++  size_t i, j, al_idx, repeats, len;
 +  const size_t one = 1;
 +  uintptr_t buf_addr = (uintptr_t) buf1;
++  const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 
};
 +
-+  for (i = 0; i < 750; ++i)
++  for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]);
++       al_idx++)
 +    {
-+      do_test (0, i, SIZE_MAX - i, BIG_CHAR);
-+      do_test (0, i, i - buf_addr, BIG_CHAR);
-+      do_test (0, i, -buf_addr - i, BIG_CHAR);
-+      do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR);
-+      do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR);
-+
-+      len = 0;
-+      for (j = 8 * sizeof(size_t) - 1; j ; --j)
-+        {
-+          len |= one << j;
-+          do_test (0, i, len - i, BIG_CHAR);
-+          do_test (0, i, len + i, BIG_CHAR);
-+          do_test (0, i, len - buf_addr - i, BIG_CHAR);
-+          do_test (0, i, len - buf_addr + i, BIG_CHAR);
-+
-+          do_test (0, i, ~len - i, BIG_CHAR);
-+          do_test (0, i, ~len + i, BIG_CHAR);
-+          do_test (0, i, ~len - buf_addr - i, BIG_CHAR);
-+          do_test (0, i, ~len - buf_addr + i, BIG_CHAR);
-+        }
++      for (repeats = 0; repeats < 2; ++repeats)
++      {
++        size_t align = repeats ? (getpagesize () - alignments[al_idx])
++                               : alignments[al_idx];
++        align /= sizeof (CHAR);
++        for (i = 0; i < 750; ++i)
++          {
++            do_test (align, i, SIZE_MAX, BIG_CHAR);
++
++            do_test (align, i, SIZE_MAX - i, BIG_CHAR);
++            do_test (align, i, i - buf_addr, BIG_CHAR);
++            do_test (align, i, -buf_addr - i, BIG_CHAR);
++            do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR);
++            do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR);
++
++            len = 0;
++            for (j = 8 * sizeof (size_t) - 1; j; --j)
++              {
++                len |= one << j;
++                do_test (align, i, len, BIG_CHAR);
++                do_test (align, i, len - i, BIG_CHAR);
++                do_test (align, i, len + i, BIG_CHAR);
++                do_test (align, i, len - buf_addr - i, BIG_CHAR);
++                do_test (align, i, len - buf_addr + i, BIG_CHAR);
++
++                do_test (align, i, ~len - i, BIG_CHAR);
++                do_test (align, i, ~len + i, BIG_CHAR);
++                do_test (align, i, ~len - buf_addr - i, BIG_CHAR);
++                do_test (align, i, ~len - buf_addr + i, BIG_CHAR);
++
++                do_test (align, i, -buf_addr, BIG_CHAR);
++                do_test (align, i, j - buf_addr, BIG_CHAR);
++                do_test (align, i, -buf_addr - j, BIG_CHAR);
++              }
++          }
++      }
 +    }
 +}
 +
  static void
  do_random_tests (void)
  {
-@@ -153,7 +187,7 @@ do_page_tests (void)
+@@ -153,7 +205,7 @@ do_page_tests (void)
    size_t last_offset = (page_size / sizeof (CHAR)) - 1;
  
    CHAR *s = (CHAR *) buf2;
@@ -4661,7 +5627,7 @@ index 80ac9e8602..a1a6746cc9 100644
    s[last_offset] = 0;
  
    /* Place short strings ending at page boundary.  */
-@@ -196,6 +230,35 @@ do_page_tests (void)
+@@ -196,6 +248,35 @@ do_page_tests (void)
      }
  }
  
@@ -4697,7 +5663,7 @@ index 80ac9e8602..a1a6746cc9 100644
  int
  test_main (void)
  {
-@@ -242,6 +305,8 @@ test_main (void)
+@@ -242,6 +323,8 @@ test_main (void)
  
    do_random_tests ();
    do_page_tests ();
@@ -11886,7 +12852,7 @@ index 395e432c09..da1446d731 100644
  
  ifeq ($(subdir),debug)
 diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h 
b/sysdeps/x86_64/multiarch/ifunc-avx2.h
-index 69f30398ae..74189b6aa5 100644
+index 69f30398ae..925e5b61eb 100644
 --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
 +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
 @@ -21,16 +21,28 @@
@@ -11904,12 +12870,12 @@ index 69f30398ae..74189b6aa5 100644
 -  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
 -      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
 +  if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
++      && CPU_FEATURES_CPU_P (cpu_features, BMI2)
        && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
 -    return OPTIMIZE (avx2);
 +    {
 +      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
-+        && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)
-+        && CPU_FEATURES_CPU_P (cpu_features, BMI2))
++        && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
 +      return OPTIMIZE (evex);
 +
 +      if (CPU_FEATURES_CPU_P (cpu_features, RTM))
@@ -11922,15 +12888,20 @@ index 69f30398ae..74189b6aa5 100644
    return OPTIMIZE (sse2);
  }
 diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c 
b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-index ce7eb1eecf..56b05ee741 100644
+index ce7eb1eecf..e712b148f5 100644
 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
 +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
-@@ -43,6 +43,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -41,8 +41,19 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/memchr.c.  */
+   IFUNC_IMPL (i, name, memchr,
              IFUNC_IMPL_ADD (array, i, memchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __memchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, memchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __memchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, memchr,
@@ -11941,7 +12912,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
  
    /* Support sysdeps/x86_64/multiarch/memcmp.c.  */
-@@ -51,6 +60,16 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -51,6 +62,16 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              (HAS_ARCH_FEATURE (AVX2_Usable)
                               && HAS_CPU_FEATURE (MOVBE)),
                              __memcmp_avx2_movbe)
@@ -11958,7 +12929,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1),
                              __memcmp_sse4_1)
              IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
-@@ -64,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -64,10 +85,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memmove_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
@@ -11971,7 +12942,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __memmove_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
-@@ -75,6 +94,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -75,6 +96,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
                              __memmove_chk_avx_unaligned_erms)
@@ -11992,7 +12963,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, __memmove_chk,
                              HAS_CPU_FEATURE (SSSE3),
                              __memmove_chk_ssse3_back)
-@@ -97,14 +130,28 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -97,14 +132,28 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, memmove,
                              HAS_ARCH_FEATURE (AVX_Usable),
                              __memmove_avx_unaligned_erms)
@@ -12023,23 +12994,29 @@ index ce7eb1eecf..56b05ee741 100644
                              __memmove_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
                              __memmove_ssse3_back)
-@@ -121,6 +168,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -119,8 +168,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/memrchr.c.  */
+   IFUNC_IMPL (i, name, memrchr,
              IFUNC_IMPL_ADD (array, i, memrchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __memrchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, memrchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __memrchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, memrchr,
 +                            (HAS_ARCH_FEATURE (AVX512VL_Usable)
-+                             && HAS_ARCH_FEATURE (AVX512BW_Usable)),
++                             && HAS_ARCH_FEATURE (AVX512BW_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
 +                            __memrchr_evex)
 +
              IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
  
  #ifdef SHARED
-@@ -139,10 +195,28 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -139,10 +200,28 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX2_Usable),
                              __memset_chk_avx2_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memset_chk,
@@ -12070,7 +13047,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __memset_chk_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, __memset_chk,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
-@@ -164,10 +238,28 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -164,10 +243,28 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX2_Usable),
                              __memset_avx2_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memset,
@@ -12101,12 +13078,17 @@ index ce7eb1eecf..56b05ee741 100644
                              __memset_avx512_unaligned)
              IFUNC_IMPL_ADD (array, i, memset,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
-@@ -179,20 +271,51 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -177,22 +274,55 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/rawmemchr.c.  */
+   IFUNC_IMPL (i, name, rawmemchr,
              IFUNC_IMPL_ADD (array, i, rawmemchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __rawmemchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, rawmemchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __rawmemchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, rawmemchr,
@@ -12155,7 +13137,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
  
    /* Support sysdeps/x86_64/multiarch/stpncpy.c.  */
-@@ -201,6 +324,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -201,6 +331,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              __stpncpy_ssse3)
              IFUNC_IMPL_ADD (array, i, stpncpy, HAS_ARCH_FEATURE (AVX2_Usable),
                              __stpncpy_avx2)
@@ -12170,7 +13152,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, stpncpy, 1,
                              __stpncpy_sse2_unaligned)
              IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
-@@ -211,6 +342,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -211,6 +349,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              __stpcpy_ssse3)
              IFUNC_IMPL_ADD (array, i, stpcpy, HAS_ARCH_FEATURE (AVX2_Usable),
                              __stpcpy_avx2)
@@ -12185,7 +13167,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
              IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
  
-@@ -245,6 +384,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -245,6 +391,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
    IFUNC_IMPL (i, name, strcat,
              IFUNC_IMPL_ADD (array, i, strcat, HAS_ARCH_FEATURE (AVX2_Usable),
                              __strcat_avx2)
@@ -12200,12 +13182,17 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
                              __strcat_ssse3)
              IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
-@@ -255,6 +402,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -253,23 +407,56 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/strchr.c.  */
+   IFUNC_IMPL (i, name, strchr,
              IFUNC_IMPL_ADD (array, i, strchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __strchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, strchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __strchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, strchr,
@@ -12216,12 +13203,16 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
              IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
  
-@@ -263,6 +419,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/strchrnul.c.  */
+   IFUNC_IMPL (i, name, strchrnul,
              IFUNC_IMPL_ADD (array, i, strchrnul,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __strchrnul_avx2)
 +            IFUNC_IMPL_ADD (array, i, strchrnul,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __strchrnul_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, strchrnul,
@@ -12232,22 +13223,26 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
  
    /* Support sysdeps/x86_64/multiarch/strrchr.c.  */
-@@ -270,6 +435,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   IFUNC_IMPL (i, name, strrchr,
              IFUNC_IMPL_ADD (array, i, strrchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __strrchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, strrchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __strrchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, strrchr,
 +                            (HAS_ARCH_FEATURE (AVX512VL_Usable)
-+                             && HAS_ARCH_FEATURE (AVX512BW_Usable)),
++                             && HAS_ARCH_FEATURE (AVX512BW_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
 +                            __strrchr_evex)
              IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
  
    /* Support sysdeps/x86_64/multiarch/strcmp.c.  */
-@@ -277,6 +450,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -277,6 +464,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, strcmp,
                              HAS_ARCH_FEATURE (AVX2_Usable),
                              __strcmp_avx2)
@@ -12263,7 +13258,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
                              __strcmp_sse42)
              IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
-@@ -288,6 +470,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -288,6 +484,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
    IFUNC_IMPL (i, name, strcpy,
              IFUNC_IMPL_ADD (array, i, strcpy, HAS_ARCH_FEATURE (AVX2_Usable),
                              __strcpy_avx2)
@@ -12278,7 +13273,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
                              __strcpy_ssse3)
              IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
-@@ -331,6 +521,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -331,6 +535,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
    IFUNC_IMPL (i, name, strncat,
              IFUNC_IMPL_ADD (array, i, strncat, HAS_ARCH_FEATURE (AVX2_Usable),
                              __strncat_avx2)
@@ -12293,7 +13288,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
                              __strncat_ssse3)
              IFUNC_IMPL_ADD (array, i, strncat, 1,
-@@ -341,6 +539,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -341,6 +553,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
    IFUNC_IMPL (i, name, strncpy,
              IFUNC_IMPL_ADD (array, i, strncpy, HAS_ARCH_FEATURE (AVX2_Usable),
                              __strncpy_avx2)
@@ -12308,12 +13303,17 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
                              __strncpy_ssse3)
              IFUNC_IMPL_ADD (array, i, strncpy, 1,
-@@ -370,6 +576,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -368,29 +588,73 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
+   IFUNC_IMPL (i, name, wcschr,
              IFUNC_IMPL_ADD (array, i, wcschr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __wcschr_avx2)
 +            IFUNC_IMPL_ADD (array, i, wcschr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __wcschr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, wcschr,
@@ -12324,12 +13324,15 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
  
    /* Support sysdeps/x86_64/multiarch/wcsrchr.c.  */
-@@ -377,6 +592,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   IFUNC_IMPL (i, name, wcsrchr,
              IFUNC_IMPL_ADD (array, i, wcsrchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __wcsrchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, wcsrchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __wcsrchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, wcsrchr,
@@ -12340,12 +13343,15 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
  
    /* Support sysdeps/x86_64/multiarch/wcscmp.c.  */
-@@ -384,6 +608,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   IFUNC_IMPL (i, name, wcscmp,
              IFUNC_IMPL_ADD (array, i, wcscmp,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __wcscmp_avx2)
 +            IFUNC_IMPL_ADD (array, i, wcscmp,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __wcscmp_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, wcscmp,
@@ -12356,12 +13362,15 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
  
    /* Support sysdeps/x86_64/multiarch/wcsncmp.c.  */
-@@ -391,6 +624,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   IFUNC_IMPL (i, name, wcsncmp,
              IFUNC_IMPL_ADD (array, i, wcsncmp,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __wcsncmp_avx2)
 +            IFUNC_IMPL_ADD (array, i, wcsncmp,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __wcsncmp_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, wcsncmp,
@@ -12372,7 +13381,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
  
    /* Support sysdeps/x86_64/multiarch/wcscpy.c.  */
-@@ -402,15 +644,40 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -402,15 +666,40 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
    /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
    IFUNC_IMPL (i, name, wcslen,
              IFUNC_IMPL_ADD (array, i, wcslen,
@@ -12415,12 +13424,17 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wcsnlen,
                              HAS_CPU_FEATURE (SSE4_1),
                              __wcsnlen_sse4_1)
-@@ -421,6 +688,15 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -419,8 +708,19 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+   /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
+   IFUNC_IMPL (i, name, wmemchr,
              IFUNC_IMPL_ADD (array, i, wmemchr,
-                             HAS_ARCH_FEATURE (AVX2_Usable),
+-                            HAS_ARCH_FEATURE (AVX2_Usable),
++                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)),
                              __wmemchr_avx2)
 +            IFUNC_IMPL_ADD (array, i, wmemchr,
 +                            (HAS_ARCH_FEATURE (AVX2_Usable)
++                             && HAS_CPU_FEATURE (BMI2)
 +                             && HAS_CPU_FEATURE (RTM)),
 +                            __wmemchr_avx2_rtm)
 +            IFUNC_IMPL_ADD (array, i, wmemchr,
@@ -12431,7 +13445,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
  
    /* Support sysdeps/x86_64/multiarch/wmemcmp.c.  */
-@@ -429,6 +705,16 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -429,6 +729,16 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              (HAS_ARCH_FEATURE (AVX2_Usable)
                               && HAS_CPU_FEATURE (MOVBE)),
                              __wmemcmp_avx2_movbe)
@@ -12448,7 +13462,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1),
                              __wmemcmp_sse4_1)
              IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
-@@ -443,7 +729,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -443,7 +753,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX2_Usable),
                              __wmemset_avx2_unaligned)
              IFUNC_IMPL_ADD (array, i, wmemset,
@@ -12464,7 +13478,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __wmemset_avx512_unaligned))
  
  #ifdef SHARED
-@@ -453,10 +746,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -453,10 +770,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memcpy_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
@@ -12477,7 +13491,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __memcpy_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
-@@ -464,6 +757,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -464,6 +781,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
                              __memcpy_chk_avx_unaligned_erms)
@@ -12498,7 +13512,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, __memcpy_chk,
                              HAS_CPU_FEATURE (SSSE3),
                              __memcpy_chk_ssse3_back)
-@@ -486,6 +793,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -486,6 +817,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, memcpy,
                              HAS_ARCH_FEATURE (AVX_Usable),
                              __memcpy_avx_unaligned_erms)
@@ -12519,7 +13533,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
                              __memcpy_ssse3_back)
              IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
-@@ -494,10 +815,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -494,10 +839,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __memcpy_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, memcpy,
@@ -12532,7 +13546,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __memcpy_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
              IFUNC_IMPL_ADD (array, i, memcpy, 1,
-@@ -511,10 +832,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -511,10 +856,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __mempcpy_chk_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
@@ -12545,7 +13559,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __mempcpy_chk_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
-@@ -522,6 +843,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -522,6 +867,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              HAS_ARCH_FEATURE (AVX_Usable),
                              __mempcpy_chk_avx_unaligned_erms)
@@ -12566,7 +13580,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
                              HAS_CPU_FEATURE (SSSE3),
                              __mempcpy_chk_ssse3_back)
-@@ -542,10 +877,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -542,10 +901,10 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
                              HAS_ARCH_FEATURE (AVX512F_Usable),
                              __mempcpy_avx512_no_vzeroupper)
              IFUNC_IMPL_ADD (array, i, mempcpy,
@@ -12579,7 +13593,7 @@ index ce7eb1eecf..56b05ee741 100644
                              __mempcpy_avx512_unaligned_erms)
              IFUNC_IMPL_ADD (array, i, mempcpy,
                              HAS_ARCH_FEATURE (AVX_Usable),
-@@ -553,6 +888,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -553,6 +912,20 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, mempcpy,
                              HAS_ARCH_FEATURE (AVX_Usable),
                              __mempcpy_avx_unaligned_erms)
@@ -12600,7 +13614,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
                              __mempcpy_ssse3_back)
              IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
-@@ -568,6 +917,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -568,6 +941,14 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, strncmp,
                              HAS_ARCH_FEATURE (AVX2_Usable),
                              __strncmp_avx2)
@@ -12615,7 +13629,7 @@ index ce7eb1eecf..56b05ee741 100644
              IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
                              __strncmp_sse42)
              IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
-@@ -582,6 +939,9 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
+@@ -582,6 +963,9 @@ __libc_ifunc_impl_list (const char *name, struct 
libc_ifunc_impl *array,
              IFUNC_IMPL_ADD (array, i, __wmemset_chk,
                              HAS_ARCH_FEATURE (AVX2_Usable),
                              __wmemset_chk_avx2_unaligned)
@@ -19178,10 +20192,10 @@ index 0000000000..75b4b7612c
 +
 +#include "strlen-avx2.S"
 diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S 
b/sysdeps/x86_64/multiarch/strlen-avx2.S
-index 73421ec1b2..45e08e64d6 100644
+index 73421ec1b2..8cfb7391b0 100644
 --- a/sysdeps/x86_64/multiarch/strlen-avx2.S
 +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
-@@ -27,370 +27,531 @@
+@@ -27,370 +27,528 @@
  # ifdef USE_AS_WCSLEN
  #  define VPCMPEQ     vpcmpeqd
  #  define VPMINU      vpminud
@@ -19924,14 +20938,11 @@ index 73421ec1b2..45e08e64d6 100644
 +L(cross_page_less_vec):
 +      tzcntl  %eax, %eax
 +#  ifdef USE_AS_WCSLEN
-+      /* NB: Multiply length by 4 to get byte count.  */
-+      sall    $2, %esi
++      /* NB: Divide by 4 to convert from byte-count to length.  */
++      shrl    $2, %eax
 +#  endif
 +      cmpq    %rax, %rsi
 +      cmovb   %esi, %eax
-+#  ifdef USE_AS_WCSLEN
-+      shrl    $2, %eax
-+#  endif
 +      VZEROUPPER_RETURN
  # endif
 -      VZEROUPPER
diff --git a/debian/patches/series b/debian/patches/series
index c72ebf30..02bd18e7 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -22,8 +22,6 @@ alpha/local-string-functions.diff
 alpha/submitted-fts64.diff
 alpha/submitted-makecontext.diff
 
-amd64/local-require-bmi-in-avx2-ifunc.diff
-
 arm/local-sigaction.diff
 arm/unsubmitted-ldconfig-cache-abi.diff
 arm/local-soname-hack.diff

Reply via email to