When the max argument to std::codecvt<wchar_t, char, mbstate_t>::length
is SIZE_MAX/4+1 or greater the multiplication with sizeof(wchar_t) will
wrap to a small value, and the alloca call will have a buffer that's
smaller than requested. The call to mbsnrtowcs then has a buffer that is
smaller than the value passed as the buffer length. When libstdc++.so is
built with -D_FORTIFY_SOURCE=3 the mismatched buffer and length will get
detected and will abort inside Glibc.

When it doesn't abort, there's no buffer overflow because Glibc's
mbsnrtowcs has the same len * sizeof(wchar_t) calculation to determine
the size of the buffer in bytes, and that will wrap to the same small
number as the alloca argument. So luckily Glibc agrees with the caller
about the real size of the buffer, and won't overflow it.

Even when the max argument isn't large enough to wrap, it can still be
much too large to safely pass to alloca, so we should limit that. We
already have a loop that processes chunks so that we can handle null
characters in the middle of the input. If we limit the alloca buffer to
4kB then we'll just loop each time that buffer is filled.

libstdc++-v3/ChangeLog:

        PR libstdc++/105857
        * config/locale/dragonfly/codecvt_members.cc (do_length): Limit
        size of alloca buffer to 4k.
        * config/locale/gnu/codecvt_members.cc (do_length): Likewise.
        * testsuite/22_locale/codecvt/length/wchar_t/105857.cc: New
        test.
---

Tested x86_64-linux. Pushed to trunk, backports to follow.

 .../locale/dragonfly/codecvt_members.cc       |  9 +++++---
 .../config/locale/gnu/codecvt_members.cc      |  9 +++++---
 .../codecvt/length/wchar_t/105857.cc          | 21 +++++++++++++++++++
 3 files changed, 33 insertions(+), 6 deletions(-)
 create mode 100644 
libstdc++-v3/testsuite/22_locale/codecvt/length/wchar_t/105857.cc

diff --git a/libstdc++-v3/config/locale/dragonfly/codecvt_members.cc 
b/libstdc++-v3/config/locale/dragonfly/codecvt_members.cc
index f84143a4d58..188d8db14a8 100644
--- a/libstdc++-v3/config/locale/dragonfly/codecvt_members.cc
+++ b/libstdc++-v3/config/locale/dragonfly/codecvt_members.cc
@@ -226,12 +226,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
     // in case we advance past it and then continue, in a loop.
-    // NB: mbsnrtowcs is a GNU extension
+    // NB: mbsnrtowcs is in POSIX.1-2008
+
+    const size_t __to_len = 1024; // Size of alloca'd output buffer
 
     // A dummy internal buffer is needed in order for mbsnrtocws to consider
     // its fourth parameter (it wouldn't with NULL as first parameter).
     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
-                                                          * __max));
+                                                          * __to_len));
     while (__from < __end && __max)
       {
        const extern_type* __from_chunk_end;
@@ -244,7 +246,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        const extern_type* __tmp_from = __from;
        size_t __conv = mbsnrtowcs(__to, &__from,
                                   __from_chunk_end - __from,
-                                  __max, &__state);
+                                  __max > __to_len ? __to_len : __max,
+                                  &__state);
        if (__conv == static_cast<size_t>(-1))
          {
            // In case of error, in order to stop at the exact place we
diff --git a/libstdc++-v3/config/locale/gnu/codecvt_members.cc 
b/libstdc++-v3/config/locale/gnu/codecvt_members.cc
index 794f25a5f35..e0c9330fbdd 100644
--- a/libstdc++-v3/config/locale/gnu/codecvt_members.cc
+++ b/libstdc++-v3/config/locale/gnu/codecvt_members.cc
@@ -230,12 +230,14 @@ namespace
 
     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
     // in case we advance past it and then continue, in a loop.
-    // NB: mbsnrtowcs is a GNU extension
+    // NB: mbsnrtowcs is in POSIX.1-2008
+
+    const size_t __to_len = 1024; // Size of alloca'd output buffer
 
     // A dummy internal buffer is needed in order for mbsnrtocws to consider
     // its fourth parameter (it wouldn't with NULL as first parameter).
     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
-                                                          * __max));
+                                                          * __to_len));
     while (__from < __end && __max)
       {
        const extern_type* __from_chunk_end;
@@ -248,7 +250,8 @@ namespace
        const extern_type* __tmp_from = __from;
        size_t __conv = mbsnrtowcs(__to, &__from,
                                   __from_chunk_end - __from,
-                                  __max, &__state);
+                                  __max > __to_len ? __to_len : __max,
+                                  &__state);
        if (__conv == static_cast<size_t>(-1))
          {
            // In case of error, in order to stop at the exact place we
diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/length/wchar_t/105857.cc 
b/libstdc++-v3/testsuite/22_locale/codecvt/length/wchar_t/105857.cc
new file mode 100644
index 00000000000..24b2fbc6089
--- /dev/null
+++ b/libstdc++-v3/testsuite/22_locale/codecvt/length/wchar_t/105857.cc
@@ -0,0 +1,21 @@
+// { dg-do run }
+
+// Bug libstdc++/105857 codecvt::do_length causes unexpected buffer overflow
+
+#include <locale>
+#include <testsuite_hooks.h>
+
+int main()
+{
+  std::string s(1050, 'a');
+  typedef std::codecvt<wchar_t, char, std::mbstate_t> Cvt;
+  const Cvt& cvt = std::use_facet<Cvt>(std::locale());
+  const char* from = s.c_str();
+  const char* from_end = s.c_str() + s.size();
+  std::size_t max = std::size_t(-1) / 4 + 2;
+  std::mbstate_t state = std::mbstate_t();
+  int n = cvt.length(state, from, from_end, max);
+  VERIFY( n == s.size() );
+  n = cvt.length(state, from, from_end, 2);
+  VERIFY( n == 2 );
+}
-- 
2.47.0

Reply via email to