tbaeder updated this revision to Diff 427989.
tbaeder marked an inline comment as done.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D124996/new/
https://reviews.llvm.org/D124996
Files:
clang/docs/ReleaseNotes.rst
clang/lib/Lex/PPExpressions.cpp
clang/test/Lexer/utf8-char-literal.cpp
Index: clang/test/Lexer/utf8-char-literal.cpp
===================================================================
--- clang/test/Lexer/utf8-char-literal.cpp
+++ clang/test/Lexer/utf8-char-literal.cpp
@@ -1,7 +1,10 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c2x -x c -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++1z -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++17 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++17 -fsyntax-only -fchar8_t -DCHAR8_T -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++20 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++20 -fsyntax-only -fno-char8_t -DNO_CHAR8_T -verify %s
int array0[u'ñ' == u'\xf1'? 1 : -1];
int array1['\xF1' != u'\xf1'? 1 : -1];
@@ -13,7 +16,7 @@
char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}}
char e = u8'á´'; // expected-error {{character too large for enclosing character literal type}}
char f = u8'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
-#elif __STDC_VERSION__ > 202000L
+#elif __STDC_VERSION__ >= 202000L
char a = u8'ñ'; // expected-error {{character too large for enclosing character literal type}}
char b = u8'\x80'; // ok
char c = u8'\u0080'; // expected-error {{universal character name refers to a control character}}
@@ -26,3 +29,40 @@
unsigned char : 1),
"Surprise!");
#endif
+
+
+/// In C++17, the behavior depends on -fchar8_t.
+#if __cplusplus == 201703L
+# if defined(__cpp_char8_t)
+# if u8'\xff' == '\xff' // expected-warning {{right side of operator converted from negative value to unsigned}}
+# error Something's not right.
+# endif
+# else
+# if u8'\xff' != '\xff'
+# error Something's not right.
+# endif
+# endif
+#endif
+
+
+/// In C++20 and up, u8 char literals are unsigned by default,
+/// unless -fno-char8_t is specified.
+#if __cplusplus > 201703L
+# if defined(__cpp_char8_t)
+# if u8'\xff' != 0xff
+# error u8 char literal is not unsigned
+# endif
+# else
+# if u8'\xff' == 0xff
+# error u8 char literal is unsigned
+# endif
+# endif
+#endif
+
+
+/// In C2x, u8 char literals are always unsigned.
+#if __STDC_VERSION__ >= 202000L
+# if u8'\xff' != 0xff
+# error u8 char literal is not unsigned
+# endif
+#endif
Index: clang/lib/Lex/PPExpressions.cpp
===================================================================
--- clang/lib/Lex/PPExpressions.cpp
+++ clang/lib/Lex/PPExpressions.cpp
@@ -408,9 +408,18 @@
// Set the value.
Val = Literal.getValue();
// Set the signedness. UTF-16 and UTF-32 are always unsigned
+ // UTF-8 is unsigned if -fchar8_t is specified.
if (Literal.isWide())
Val.setIsUnsigned(!TargetInfo::isTypeSigned(TI.getWCharType()));
- else if (!Literal.isUTF16() && !Literal.isUTF32())
+ else if (Literal.isUTF16() || Literal.isUTF32())
+ Val.setIsUnsigned(true);
+ else if (Literal.isUTF8()) {
+ if (PP.getLangOpts().CPlusPlus)
+ Val.setIsUnsigned(
+ PP.getLangOpts().Char8 ? true : !PP.getLangOpts().CharIsSigned);
+ else
+ Val.setIsUnsigned(true);
+ } else
Val.setIsUnsigned(!PP.getLangOpts().CharIsSigned);
if (Result.Val.getBitWidth() > Val.getBitWidth()) {
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -315,6 +315,8 @@
template parameter, to conform to the Itanium C++ ABI and be compatible with
GCC. This breaks binary compatibility with code compiled with earlier versions
of clang; use the ``-fclang-abi-compat=14`` option to get the old mangling.
+- Preprocessor character literals with a ``u8`` prefix are now correctly treated as
+ unsigned character literals. This fixes `Issue 54886 <https://github.com/llvm/llvm-project/issues/54886>`_.
C++20 Feature Support
^^^^^^^^^^^^^^^^^^^^^
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits