On Mon, Jul 25, 2022 at 11:01 AM Tom Honermann via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This patch corrects handling of UTF-8 character literals in preprocessing > directives so that they are treated as unsigned types in char8_t enabled > C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t). Previously, > UTF-8 character literals were always treated as having the same type as > ordinary character literals (signed or unsigned dependent on target or use > of the -fsigned-char or -funsigned char options). > > Fixes https://gcc.gnu.org/PR106426.
The above mention of the PR # should just be: preprocessor/106426 And then when this patch gets committed, it will be recorded in bugzilla also. Thanks, Andrew Pinski > > gcc/c-family/ChangeLog: > * c-opts.cc (c_common_post_options): Assign > cpp_opts->unsigned_utf8char > subject to -fchar8_t, -fsigned-char, and/or -funsigned-char. > > gcc/testsuite/ChangeLog: > * g++.dg/ext/char8_t-char-literal-1.C: Check signedness of u8 > literals. > * g++.dg/ext/char8_t-char-literal-2.C: Check signedness of u8 > literals. > > libcpp/ChangeLog: > * charset.cc (narrow_str_to_charconst): Set signedness of CPP_UTF8CHAR > literals based on unsigned_utf8char. > * include/cpplib.h (cpp_options): Add unsigned_utf8char. > * init.cc (cpp_create_reader): Initialize unsigned_utf8char. > --- > gcc/c-family/c-opts.cc | 1 + > gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C | 6 +++++- > gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C | 4 ++++ > libcpp/charset.cc | 4 ++-- > libcpp/include/cpplib.h | 4 ++-- > libcpp/init.cc | 1 + > 6 files changed, 15 insertions(+), 5 deletions(-) > > diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc > index 108adc5caf8..02ce1e86cdb 100644 > --- a/gcc/c-family/c-opts.cc > +++ b/gcc/c-family/c-opts.cc > @@ -1062,6 +1062,7 @@ c_common_post_options (const char **pfilename) > /* char8_t support is implicitly enabled in C++20 and C2X. */ > if (flag_char8_t == -1) > flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x; > + cpp_opts->unsigned_utf8char = flag_char8_t ? 1 : cpp_opts->unsigned_char; > > if (flag_extern_tls_init) > { > diff --git a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C > b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C > index 8ed85ccfdcd..2994dd38516 100644 > --- a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C > +++ b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-1.C > @@ -1,6 +1,6 @@ > // Test that UTF-8 character literals have type char if -fchar8_t is not > enabled. > // { dg-do compile } > -// { dg-options "-std=c++17 -fno-char8_t" } > +// { dg-options "-std=c++17 -fsigned-char -fno-char8_t" } > > template<typename T1, typename T2> > struct is_same > @@ -10,3 +10,7 @@ template<typename T> > { static const bool value = true; }; > > static_assert(is_same<decltype(u8'x'), char>::value, "Error"); > + > +#if u8'\0' - 1 > 0 > +#error "UTF-8 character literals not signed in preprocessor" > +#endif > diff --git a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C > b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C > index 7861736689c..db4fe70046d 100644 > --- a/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C > +++ b/gcc/testsuite/g++.dg/ext/char8_t-char-literal-2.C > @@ -10,3 +10,7 @@ template<typename T> > { static const bool value = true; }; > > static_assert(is_same<decltype(u8'x'), char8_t>::value, "Error"); > + > +#if u8'\0' - 1 < 0 > +#error "UTF-8 character literals not unsigned in preprocessor" > +#endif > diff --git a/libcpp/charset.cc b/libcpp/charset.cc > index ca8b7cf7aa5..12e31632228 100644 > --- a/libcpp/charset.cc > +++ b/libcpp/charset.cc > @@ -1960,8 +1960,8 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string > str, > /* Multichar constants are of type int and therefore signed. */ > if (i > 1) > unsigned_p = 0; > - else if (type == CPP_UTF8CHAR && !CPP_OPTION (pfile, cplusplus)) > - unsigned_p = 1; > + else if (type == CPP_UTF8CHAR) > + unsigned_p = CPP_OPTION (pfile, unsigned_utf8char); > else > unsigned_p = CPP_OPTION (pfile, unsigned_char); > > diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h > index 3eba6f74b57..f9c042db034 100644 > --- a/libcpp/include/cpplib.h > +++ b/libcpp/include/cpplib.h > @@ -581,8 +581,8 @@ struct cpp_options > ints and target wide characters, respectively. */ > size_t precision, char_precision, int_precision, wchar_precision; > > - /* True means chars (wide chars) are unsigned. */ > - bool unsigned_char, unsigned_wchar; > + /* True means chars (wide chars, UTF-8 chars) are unsigned. */ > + bool unsigned_char, unsigned_wchar, unsigned_utf8char; > > /* True if the most significant byte in a word has the lowest > address in memory. */ > diff --git a/libcpp/init.cc b/libcpp/init.cc > index f4ab83d2145..0242da5f55c 100644 > --- a/libcpp/init.cc > +++ b/libcpp/init.cc > @@ -231,6 +231,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table > *table, > CPP_OPTION (pfile, int_precision) = CHAR_BIT * sizeof (int); > CPP_OPTION (pfile, unsigned_char) = 0; > CPP_OPTION (pfile, unsigned_wchar) = 1; > + CPP_OPTION (pfile, unsigned_utf8char) = 1; > CPP_OPTION (pfile, bytes_big_endian) = 1; /* does not matter */ > > /* Default to no charset conversion. */ > -- > 2.32.0 >