https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102958
Bug ID: 102958 Summary: std::u8string suboptimal compared to std::string, triggers warnings Product: gcc Version: 12.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: msebor at gcc dot gnu.org Target Milestone: --- The two functions below are equivalent and would ideally both result in optimally efficient code, but as the dump shows, only the one with std::string does. The other one that uses uchar8_t doesn't. In addition, the latter also triggers a bogus warning. This was discussed in https://gcc.gnu.org/pipermail/gcc-patches/2021-October/582416.html and is the reason for the -Wstringop-overread suppression in libstdc++-v3/testsuite/27_io/filesystem/path/factory/u8path-char8_t.cc. $ cat x.C && gcc -O2 -S -std=c++17 -fchar8_t -fdump-tree-optimized=/dev/stdout x.C int f () { std::string s = "123456789"; std::string t = s; return t.length (); } int g () { std::u8string s = u8"123456789"; std::u8string t = s; return t.length (); } ;; Function f (_Z1fv, funcdef_no=1222, decl_uid=34814, cgraph_uid=307, symbol_order=346) int f () { <bb 2> [local count: 1073741824]: return 9; } ;; Function g (_Z1gv, funcdef_no=1223, decl_uid=34822, cgraph_uid=308, symbol_order=347) int g () { void * D.41167; size_t __i; struct u8string t; struct u8string s; int _7; char8_t * _19; char8_t _23; char8_t * _34; char8_t * _41; char8_t * _51; char8_t * _52; char8_t * _61; char8_t _64; char8_t * _65; long unsigned int _67; long unsigned int _68; long unsigned int _71; long unsigned int _72; char8_t * _73; long unsigned int _75; long unsigned int _76; char8_t * _83; long unsigned int _85; char8_t * _102; void * _129; char8_t * _130; char8_t * _134; long unsigned int _135; <bb 2> [local count: 1073741824]: MEM[(struct basic_string *)&s] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&s] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&s]._M_p = &s.D.29743._M_local_buf; <bb 3> [local count: 8687547547]: # __i_35 = PHI <__i_21(3), 0(2)> __i_21 = __i_35 + 1; _23 = MEM[(const char_type &)"123456789" + __i_21 * 1]; if (_23 != 0) goto <bb 3>; [89.00%] else goto <bb 4>; [11.00%] <bb 4> [local count: 1073741824]: if (__i_21 > 15) goto <bb 5>; [33.00%] else goto <bb 8>; [67.00%] <bb 5> [local count: 354334802]: if (__i_21 > 4611686018427387903) goto <bb 6>; [0.04%] else goto <bb 7>; [99.96%] <bb 6> [local count: 141736]: std::__throw_length_error ("basic_string::_M_create"); <bb 7> [local count: 354193066]: _85 = __i_35 + 2; _41 = operator new (_85); s._M_dataplus._M_p = _41; s.D.29743._M_allocated_capacity = __i_21; __builtin_memcpy (_41, "123456789", __i_21); s._M_string_length = __i_21; _19 = s._M_dataplus._M_p; _130 = _19 + __i_21; MEM[(char_type &)_130] = 0; MEM[(struct basic_string *)&t] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&t] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&t]._M_p = &t.D.29743._M_local_buf; if (_19 == 0B) goto <bb 10>; [30.00%] else goto <bb 11>; [70.00%] <bb 8> [local count: 703255365]: if (__i_21 == 1) goto <bb 9>; [50.19%] else goto <bb 22>; [49.81%] <bb 9> [local count: 352981469]: s._M_string_length = 1; MEM <unsigned short> [(char_type &)&s + 16] = 49; MEM[(struct basic_string *)&t] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&t] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&t]._M_p = &t.D.29743._M_local_buf; _64 = MEM[(const char_type &)&s + 16]; MEM[(char_type &)&t + 16] = _64; goto <bb 14>; [100.00%] <bb 10> [local count: 124582]: std::__throw_logic_error ("basic_string::_M_construct null not valid"); <bb 11> [local count: 342421520]: _135 = __i_35 + 2; _61 = operator new (_135); <bb 12> [local count: 308179374]: t._M_dataplus._M_p = _61; t.D.29743._M_allocated_capacity = __i_21; <bb 13> [local count: 323794238]: # _34 = PHI <&t.D.29743._M_local_buf(22), _61(12)> # _134 = PHI <_102(22), _19(12)> __builtin_memcpy (_34, _134, __i_21); <bb 14> [local count: 1003677033]: t._M_string_length = __i_21; _51 = t._M_dataplus._M_p; _52 = _51 + __i_21; MEM[(char_type &)_52] = 0; _7 = (int) __i_21; if (&t.D.29743._M_local_buf != _51) goto <bb 15>; [53.47%] else goto <bb 16>; [46.53%] <bb 15> [local count: 536666103]: _71 = t.D.29743._M_allocated_capacity; _72 = _71 + 1; operator delete (_51, _72); <bb 16> [local count: 1003677033]: t ={v} {CLOBBER}; _65 = s._M_dataplus._M_p; if (&s.D.29743._M_local_buf != _65) goto <bb 17>; [53.47%] else goto <bb 18>; [46.53%] <bb 17> [local count: 536666103]: _67 = s.D.29743._M_allocated_capacity; _68 = _67 + 1; operator delete (_65, _68); <bb 18> [local count: 1003677033]: s ={v} {CLOBBER}; s ={v} {CLOBBER}; t ={v} {CLOBBER}; return _7; <bb 19> [count: 0]: <L6>: _73 = s._M_dataplus._M_p; if (&s.D.29743._M_local_buf != _73) goto <bb 20>; [0.00%] else goto <bb 21>; [0.00%] <bb 20> [count: 0]: _75 = s.D.29743._M_allocated_capacity; _76 = _75 + 1; operator delete (_73, _76); <bb 21> [count: 0]: s ={v} {CLOBBER}; _129 = __builtin_eh_pointer (7); __builtin_unwind_resume (_129); <bb 22> [local count: 0]: __builtin_memcpy (&s.D.29743._M_local_buf, "123456789", __i_21); s._M_string_length = __i_21; _102 = s._M_dataplus._M_p; _83 = _102 + __i_21; MEM[(char_type &)_83] = 0; MEM[(struct basic_string *)&t] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&t] ={v} {CLOBBER}; MEM[(struct _Alloc_hider *)&t]._M_p = &t.D.29743._M_local_buf; if (_102 == 0B) goto <bb 10>; [30.00%] else goto <bb 13>; [70.00%] } In file included from /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/string:40, from x.C:1: In static member function ‘static std::char_traits<char>::char_type* std::char_traits<char>::copy(std::char_traits<char>::char_type*, const std::char_traits<char>::char_type*, std::size_t)’, inlined from ‘static void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy(_CharT*, const _CharT*, std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:361:21, inlined from ‘static void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy(_CharT*, const _CharT*, std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type) [with _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:356:7, inlined from ‘static void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_S_copy_chars(_CharT*, const _CharT*, const _CharT*) [with _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:408:16, inlined from ‘void std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::_M_construct(_InIterator, _InIterator, std::forward_iterator_tag) [with _FwdIterator = const char8_t*; _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.tcc:225:25, inlined from ‘std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::basic_string(const _CharT*, const _Alloc&) [with <template-parameter-2-1> = std::allocator<char8_t>; _CharT = char8_t; _Traits = std::char_traits<char8_t>; _Alloc = std::allocator<char8_t>]’ at /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.h:541:14, inlined from ‘int g()’ at x.C:12:21: /build/gcc-master/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/char_traits.h:355:56: warning: ‘void* __builtin_memcpy(void*, const void*, long unsigned int)’ reading between 16 and 4611686018427387903 bytes from a region of size 10 [-Wstringop-overread] 355 | return static_cast<char_type*>(__builtin_memcpy(__s1, __s2, __n)); | ~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~