On Thu, 3 Apr 2025 at 10:24, Jonathan Wakely <jwak...@redhat.com> wrote: > > On Thu, 3 Apr 2025 at 09:55, Tomasz Kamiński <tkami...@redhat.com> wrote: > > > > This patch corrects handling of UTF-32LE and UTF32-BE in > > __unicode::__literal_encoding_is_unicode<_CharT>, so they are > > recognized as unicode and functions produces correct result for wchar_t. > > > > Use `__unicode::__field_width` to compute the estimated witdh > > "width" > > > of the charcter for unicode wide encoding. > > "character" > > > > > PR libstdc++-v3/119593 > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/unicode.h > > (__unicode::__literal_encoding_is_unicode<_CharT>): > > Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix. > > * include/std/format (__formatter_str::_S_character_width): > > Define. > > (__formatter_str::_S_character_width): Updated passed char > > length. > > * testsuite/std/format/functions/format.cc: Test for wchar_t. > > --- > > Testing on x86_64-linux. OK for trunk? > > I believe we should backport it, given that all wchar_t uses are > > impacted. > > > > libstdc++-v3/include/bits/unicode.h | 2 ++ > > libstdc++-v3/include/std/format | 15 ++++++++++++++- > > .../testsuite/std/format/functions/format.cc | 8 ++++++-- > > 3 files changed, 22 insertions(+), 3 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/unicode.h > > b/libstdc++-v3/include/bits/unicode.h > > index 24b1ac3d53d..99d972eccff 100644 > > --- a/libstdc++-v3/include/bits/unicode.h > > +++ b/libstdc++-v3/include/bits/unicode.h > > @@ -1039,6 +1039,8 @@ inline namespace __v16_0_0 > > string_view __s(__enc); > > if (__s.ends_with("//")) > > __s.remove_suffix(2); > > + if (__s.ends_with("LE") || __s.ends_with("BE")) > > + __s.remove_suffix(2); > > return __s == "16" || __s == "32"; > > } > > } > > diff --git a/libstdc++-v3/include/std/format > > b/libstdc++-v3/include/std/format > > index c3327e1d384..603facc51de 100644 > > --- a/libstdc++-v3/include/std/format > > +++ b/libstdc++-v3/include/std/format > > @@ -1277,12 +1277,25 @@ namespace __format > > _M_spec); > > } > > > > Please put [[__gnu__::__always_inline__]] on this function, so that it > doesn't add any overhead for narrow chars: > > > + static size_t > > + _S_character_width(_CharT __c) > > + { > > + using __unicode::__literal_encoding_is_unicode; > > + // N.B. single byte cannot encode charcter of width greater than 1 > > + if (sizeof(_CharT) > 1u && __literal_encoding_is_unicode<_CharT>()) > > I think this can be 'if constexpr' > > OK for trunk and gcc-14 with those changes, thanks. > (No backport for gcc-13 because it doesn't have the Unicode-aware > field width support.) > > > + return __unicode::__field_width(__c); > > + else > > + return 1u; > > + } > > + > > template<typename _Out> > > typename basic_format_context<_Out, _CharT>::iterator > > _M_format_character(_CharT __c, > > basic_format_context<_Out, _CharT>& __fc) const > > { > > - return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, > > _M_spec); > > + return __format::__write_padded_as_spec({&__c, 1u}, > > + _S_character_width(__c), > > + __fc, _M_spec); > > } > > > > template<typename _Int> > > diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc > > b/libstdc++-v3/testsuite/std/format/functions/format.cc > > index 7fc42017045..d8dbf463413 100644 > > --- a/libstdc++-v3/testsuite/std/format/functions/format.cc > > +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc > > @@ -501,9 +501,14 @@ test_unicode() > > { > > // Similar to sC example in test_std_examples, but not from the standard. > > // Verify that the character "🤡" has estimated field width 2, > > - // rather than estimated field width equal to strlen("🤡"), which would > > be 4. > > + // rather than estimated field width equal to strlen("🤡"), which would > > be 4, > > + // or just width 1 for single character. > > std::string sC = std::format("{:*<3}", "🤡"); > > VERIFY( sC == "🤡*" ); > > + std::wstring wsC = std::format(L"{:*<3}", L"🤡"); > > + VERIFY( wsC == L"🤡*" ); > > + wsC = std::format(L"{:*<3}", L'🤡'); > > + VERIFY( wsC == L"🤡*" ); > > > > // Verify that "£" has estimated field width 1, not strlen("£") == 2. > > std::string sL = std::format("{:*<3}", "£"); > > @@ -517,7 +522,6 @@ test_unicode() > > std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡"); > > VERIFY( sP == "£ *" ); > > sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡"); > > - VERIFY( sP == "£* **" );
I didn't notice at first that this line has been removed, was that an accident? > > // Verify field width handling for extended grapheme clusters, > > // and that a cluster gets output as a single item, not truncated. > > -- > > 2.48.1 > >