cor3ntin updated this revision to Diff 478706.
cor3ntin added a comment.
Avoid duplicating errors in macros.
Because of that we cannot alays recover nicely with a loose matching.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D138861/new/
https://reviews.llvm.org/D138861
Files:
clang/docs/ReleaseNotes.rst
clang/lib/Lex/Lexer.cpp
clang/lib/Lex/LiteralSupport.cpp
clang/test/CXX/drs/dr26xx.cpp
clang/test/Lexer/char-escapes-delimited.c
clang/test/Lexer/unicode.c
clang/test/Preprocessor/ucn-pp-identifier.c
clang/www/cxx_dr_status.html
Index: clang/www/cxx_dr_status.html
===================================================================
--- clang/www/cxx_dr_status.html
+++ clang/www/cxx_dr_status.html
@@ -15648,7 +15648,7 @@
<td><a href="https://wg21.link/cwg2640">2640</a></td>
<td>accepted</td>
<td>Allow more characters in an n-char sequence</td>
- <td class="none" align="center">Unknown</td>
+ <td class="unreleased" align="center">Clang 16</td>
</tr>
<tr id="2641">
<td><a href="https://wg21.link/cwg2641">2641</a></td>
Index: clang/test/Preprocessor/ucn-pp-identifier.c
===================================================================
--- clang/test/Preprocessor/ucn-pp-identifier.c
+++ clang/test/Preprocessor/ucn-pp-identifier.c
@@ -121,17 +121,17 @@
#define \u{123456789} // expected-error {{hex escape sequence out of range}} expected-error {{macro name must be an identifier}}
#define \u{ // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
#define \u{fgh} // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
-#define \N{ // expected-warning {{incomplete delimited universal character name; treating as '\' 'N' '{' identifier}} expected-error {{macro name must be an identifier}}
+#define \N{
+// expected-warning@-1 {{incomplete delimited universal character name; treating as '\' 'N' '{' identifier}}
+// expected-error@-2 {{macro name must be an identifier}}
#define \N{} // expected-warning {{empty delimited universal character name; treating as '\' 'N' '{' '}'}} expected-error {{macro name must be an identifier}}
#define \N{NOTATHING} // expected-error {{'NOTATHING' is not a valid Unicode character name}} \
// expected-error {{macro name must be an identifier}}
#define \NN // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} expected-error {{macro name must be an identifier}}
#define \N{GREEK_SMALL-LETTERALPHA} // expected-error {{'GREEK_SMALL-LETTERALPHA' is not a valid Unicode character name}} \
// expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespaces}}
-
-#define CONCAT(A, B) A##B
-int CONCAT(\N{GREEK, CAPITALLETTERALPHA}); // expected-error{{expected}} \
- // expected-warning {{incomplete delimited universal character name}}
+#define \N{ð¤¡} // expected-error {{'ð¤¡' is not a valid Unicode character name}} \
+ // expected-error {{macro name must be an identifier}}
#ifdef TRIGRAPHS
int \N??<GREEK CAPITAL LETTER ALPHA??> = 0; // expected-warning{{extension}} cxx2b-warning {{before C++2b}} \
Index: clang/test/Lexer/unicode.c
===================================================================
--- clang/test/Lexer/unicode.c
+++ clang/test/Lexer/unicode.c
@@ -43,6 +43,7 @@
extern int \U0001E4D0; // ð NAG MUNDARI LETTER O - Added in Unicode 15
extern int _\N{TANGSA LETTER GA};
extern int _\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
+ // expected-error {{expected ';' after top level declarator}} \
// expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespace}}
Index: clang/test/Lexer/char-escapes-delimited.c
===================================================================
--- clang/test/Lexer/char-escapes-delimited.c
+++ clang/test/Lexer/char-escapes-delimited.c
@@ -96,6 +96,11 @@
unsigned i = u'\N{GREEK CAPITAL LETTER DELTA}'; // ext-warning {{extension}} cxx2b-warning {{C++2b}}
char j = '\NN'; // expected-error {{expected '{' after '\N' escape sequence}} expected-warning {{multi-character character constant}}
unsigned k = u'\N{LOTUS'; // expected-error {{incomplete universal character name}}
+
+ const char* emoji = "\N{ð¤¡}"; // expected-error {{'ð¤¡' is not a valid Unicode character name}} \
+ // expected-note 5{{did you mean}}
+ const char* nested = "\N{\N{SPARKLE}}"; // expected-error {{'\N{SPARKLE' is not a valid Unicode character name}} \
+ // expected-note 5{{did you mean}}
}
void separators(void) {
Index: clang/test/CXX/drs/dr26xx.cpp
===================================================================
--- clang/test/CXX/drs/dr26xx.cpp
+++ clang/test/CXX/drs/dr26xx.cpp
@@ -28,3 +28,18 @@
}
}
+
+namespace dr2640 { // dr2640: 16
+
+int \N{Î} = 0; //expected-error {{'Î' is not a valid Unicode character name}} \
+ //expected-error {{expected unqualified-id}}
+const char* emoji = "\N{ð¤¡}"; // expected-error {{'ð¤¡' is not a valid Unicode character name}} \
+ // expected-note 5{{did you mean}}
+
+#define z(x) 0
+#define a z(
+int x = a\N{abc}); // expected-error {{'abc' is not a valid Unicode character name}}
+int y = a\N{LOTUS}); // expected-error {{character <U+1FAB7> not allowed in an identifier}} \
+ // expected-error {{use of undeclared identifier 'aðª·'}} \
+ // expected-error {{extraneous ')' before ';'}}
+}
Index: clang/lib/Lex/LiteralSupport.cpp
===================================================================
--- clang/lib/Lex/LiteralSupport.cpp
+++ clang/lib/Lex/LiteralSupport.cpp
@@ -548,11 +548,10 @@
return false;
}
ThisTokBuf++;
- const char *ClosingBrace =
- std::find_if_not(ThisTokBuf, ThisTokEnd, [](char C) {
- return llvm::isAlnum(C) || llvm::isSpace(C) || C == '_' || C == '-';
- });
- bool Incomplete = ClosingBrace == ThisTokEnd || *ClosingBrace != '}';
+ const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) {
+ return C == '}' || isVerticalWhitespace(C);
+ });
+ bool Incomplete = ClosingBrace == ThisTokEnd;
bool Empty = ClosingBrace == ThisTokBuf;
if (Incomplete || Empty) {
if (Diags) {
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -3309,6 +3309,7 @@
llvm::Optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr,
Token *Result) {
unsigned CharSize;
+
bool Diagnose = Result && !isLexingRawMode();
char C = getCharAndSize(StartPtr, CharSize);
@@ -3335,7 +3336,7 @@
break;
}
- if (!isAlphanumeric(C) && C != '_' && C != '-' && C != ' ')
+ if (isVerticalWhitespace(C))
break;
Buffer.push_back(C);
}
@@ -3353,10 +3354,10 @@
llvm::sys::unicode::nameToCodepointStrict(Name);
llvm::Optional<llvm::sys::unicode::LooseMatchingResult> LooseMatch;
if (!Res) {
- if (!isLexingRawMode()) {
+ LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
+ if (Diagnose) {
Diag(StartPtr, diag::err_invalid_ucn_name)
<< StringRef(Buffer.data(), Buffer.size());
- LooseMatch = llvm::sys::unicode::nameToCodepointLooseMatching(Name);
if (LooseMatch) {
Diag(StartName, diag::note_invalid_ucn_name_loose_matching)
<< FixItHint::CreateReplacement(
@@ -3364,35 +3365,29 @@
LooseMatch->Name);
}
}
- // When finding a match using Unicode loose matching rules
- // recover after having emitted a diagnostic.
- if (!LooseMatch)
- return llvm::None;
// We do not offer misspelled character names suggestions here
// as the set of what would be a valid suggestion depends on context,
// and we should not make invalid suggestions.
}
- if (Diagnose && PP && !LooseMatch)
+ if (Diagnose && Res)
Diag(BufferPtr, PP->getLangOpts().CPlusPlus2b
? diag::warn_cxx2b_delimited_escape_sequence
: diag::ext_delimited_escape_sequence)
<< /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0);
- if (LooseMatch)
+ if (LooseMatch && Diagnose)
Res = LooseMatch->CodePoint;
if (Result) {
Result->setFlag(Token::HasUCN);
- if (CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 4))
- StartPtr = CurPtr;
- else
- while (StartPtr != CurPtr)
- (void)getAndAdvanceChar(StartPtr, *Result);
- } else {
- StartPtr = CurPtr;
}
- return *Res;
+ if (!Result || CurPtr - StartPtr == (ptrdiff_t)(Buffer.size() + 4))
+ StartPtr = CurPtr;
+ else
+ while (StartPtr != CurPtr)
+ (void)getAndAdvanceChar(StartPtr, *Result);
+ return Res ? llvm::Optional<uint32_t>(*Res) : llvm::None;
}
uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -664,6 +664,7 @@
- Implemented "char8_t Compatibility and Portability Fix" (`P2513R3 <https://wg21.link/P2513R3>`_).
This change was applied to C++20 as a Defect Report.
- Implemented "Permitting static constexpr variables in constexpr functions" (`P2647R1 <https://wg21.link/P2647R1>_`).
+- Implemented `CWG2640 Allow more characters in an n-char sequence <https://wg21.link/CWG2640>_`.
CUDA/HIP Language Changes in Clang
----------------------------------
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits