https://github.com/AaronBallman updated https://github.com/llvm/llvm-project/pull/137234
>From 56a3f3cd282e9bd5ef9014e4125380e0d9685121 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Thu, 24 Apr 2025 14:17:42 -0400 Subject: [PATCH 01/16] [C] Diagnose use of C++ keywords in C This adds a new diagnostic group, -Widentifier-is-c++-keyword, which is off by default and grouped under -Wc++-compat. The diagnostic catches use of C++ keywords in C code. This change additionally fixes an issue with -Wreserved-identifier not diagnosing use of reserved identifiers in function parameter lists. Partially fixes #21898 --- clang/docs/ReleaseNotes.rst | 3 + clang/include/clang/Basic/DiagnosticGroups.td | 3 +- .../clang/Basic/DiagnosticSemaKinds.td | 3 + clang/include/clang/Basic/IdentifierTable.h | 11 +- clang/lib/Basic/IdentifierTable.cpp | 27 +++ clang/lib/Sema/SemaDecl.cpp | 23 ++ clang/test/Sema/c++-keyword-in-c.c | 213 ++++++++++++++++++ 7 files changed, 279 insertions(+), 4 deletions(-) create mode 100644 clang/test/Sema/c++-keyword-in-c.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index d1f24fb23d44d..57b604335fcdd 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -143,6 +143,9 @@ C Language Changes - Added ``-Wimplicit-void-ptr-cast``, grouped under ``-Wc++-compat``, which diagnoses implicit conversion from ``void *`` to another pointer type as being incompatible with C++. (#GH17792) +- Added ``-Widentifier-is-c++-keyword``, grouped under ``-Wc++-compat``, which + diagnoses when a C++ keyword is used as an identifier in C. Partially + addresses #GH21898. C2y Feature Support ^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 6441b8049ed8d..84e590a857398 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -155,8 +155,9 @@ def C99Compat : DiagGroup<"c99-compat">; def C23Compat : DiagGroup<"c23-compat">; def : DiagGroup<"c2x-compat", [C23Compat]>; +def CppKeywordInC : DiagGroup<"identifier-is-c++-keyword">; def ImplicitVoidPtrCast : DiagGroup<"implicit-void-ptr-cast">; -def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast]>; +def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast, CppKeywordInC]>; def ExternCCompat : DiagGroup<"extern-c-compat">; def KeywordCompat : DiagGroup<"keyword-compat">; def GNUCaseRange : DiagGroup<"gnu-case-range">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 8ff170520aafe..50a960313349a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -496,6 +496,9 @@ def warn_unused_lambda_capture: Warning<"lambda capture %0 is not " "%select{used|required to be captured for this use}1">, InGroup<UnusedLambdaCapture>, DefaultIgnore; +def warn_identifier_is_cpp_keyword : Warning< + "identifier %0 conflicts with a C++ keyword">, + InGroup<CppKeywordInC>, DefaultIgnore; def warn_reserved_extern_symbol: Warning< "identifier %0 is reserved because %select{" "<ERROR>|" // ReservedIdentifierStatus::NotReserved diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 1275b056227b5..05c989c1b07d9 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -444,13 +444,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { } bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } - /// Return true if this token is a keyword in the specified language. + /// Return true if this identifier uses a keyword token and is a keyword in + /// the specified language. bool isKeyword(const LangOptions &LangOpts) const; - /// Return true if this token is a C++ keyword in the specified - /// language. + /// Return true if this identifier uses a keyword token and is a C++ keyword + /// in the specified language. bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; + /// Returns true if the name of this identifier matches a keyword given the + /// specified language options. + bool isNameKeyword(const LangOptions &LangOpts) const; + /// Get and set FETokenInfo. The language front-end is allowed to associate /// arbitrary metadata with this token. void *getFETokenInfo() const { return FETokenInfo; } diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 16151c94464f9..412f0af40861a 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -343,6 +343,14 @@ static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts, } } +static KeywordStatus getNameKwStatus(const LangOptions &LangOpts, + StringRef Name) { + return llvm::StringSwitch<KeywordStatus>(Name) +#define KEYWORD(NAME, FLAGS) .Case(#NAME, getKeywordStatus(LangOpts, FLAGS)) +#include "clang/Basic/TokenKinds.def" + .Default(KS_Disabled); +} + /// Returns true if the identifier represents a keyword in the /// specified language. bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const { @@ -355,6 +363,25 @@ bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const { } } +bool IdentifierInfo::isNameKeyword(const LangOptions &LangOpts) const { + // This differs from IdentifierInfo::isCPlusPlusKeyword(). That function + // tests if the identifier is a keyword token in C++ mode and then isn't a + // keyword token in C modes. In our case, if it was a keyword, we wouldn't + // have gotten the identifier for it anyway, so that function will always + // return false for us. Instead, check against the token definitions directly. + // + // FIXME: It would be nice to handle things like char8_t and wchar_t here as + // well, but those require looking at the currently selected language options + // which would make this interface confusing with isCPlusPlusKeyword. + switch (getNameKwStatus(LangOpts, getName())) { + case KS_Enabled: + case KS_Extension: + return true; + default: + return false; + } +} + /// Returns true if the identifier represents a C++ keyword in the /// specified language. bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index d28a2107d58a9..9bb96dbdc1670 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -61,6 +61,7 @@ #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/TargetParser/Triple.h" #include <algorithm> @@ -6107,6 +6108,23 @@ static bool isFromSystemHeader(SourceManager &SM, const Decl *D) { SM.isInSystemMacro(D->getLocation()); } +static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo* II) { + if (!II) + return false; + + // Clear all the C language options, set all the C++ language options, but + // otherwise leave all the defaults alone. This isn't ideal because some + // feature flags are language-specific, but this is a reasonable + // approximation. + LangOptions LO = S.getLangOpts(); + LO.CPlusPlus = LO.CPlusPlus11 = LO.CPlusPlus14 = LO.CPlusPlus17 = + LO.CPlusPlus20 = LO.CPlusPlus23 = LO.CPlusPlus26 = 1; + LO.C99 = LO.C11 = LO.C17 = LO.C23 = LO.C2y = 0; + LO.Char8 = 1; // Presume this is always a keyword in C++. + LO.WChar = 1; // Presume this is always a keyword in C++. + return II->isNameKeyword(LO); +} + void Sema::warnOnReservedIdentifier(const NamedDecl *D) { // Avoid warning twice on the same identifier, and don't warn on redeclaration // of system decl. @@ -6118,6 +6136,10 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { Diag(D->getLocation(), diag::warn_reserved_extern_symbol) << D << static_cast<int>(Status); } + // Diagnose use of C++ keywords in C as being incompatible with C++. + if (!getLangOpts().CPlusPlus && + isKeywordInCPlusPlus(*this, D->getIdentifier())) + Diag(D->getLocation(), diag::warn_identifier_is_cpp_keyword) << D; } Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { @@ -15311,6 +15333,7 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D, if (D.isInvalidType()) New->setInvalidDecl(); + warnOnReservedIdentifier(New); CheckExplicitObjectParameter(*this, New, ExplicitThisLoc); assert(S->isFunctionPrototypeScope()); diff --git a/clang/test/Sema/c++-keyword-in-c.c b/clang/test/Sema/c++-keyword-in-c.c new file mode 100644 index 0000000000000..364f115ddcb44 --- /dev/null +++ b/clang/test/Sema/c++-keyword-in-c.c @@ -0,0 +1,213 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Widentifier-is-c++-keyword %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wc++-compat %s +// RUN: %clang_cc1 -fsyntax-only -verify=good %s +// RUN: %clang_cc1 -fsyntax-only -verify=cxx -x c++ -std=c++2c %s +// good-no-diagnostics + +// 'try', 'this' and 'throw' are not tested as identifiers, but are instead +// tested as other constructs (otherwise there would be redefinition errors in +// C). +int catch; // expected-warning {{identifier 'catch' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int class; // expected-warning {{identifier 'class' conflicts with a C++ keyword}} \ + cxx-error {{declaration of anonymous class must be a definition}} \ + cxx-warning {{declaration does not declare anything}} +int const_cast; // expected-warning {{identifier 'const_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int delete; // expected-warning {{identifier 'delete' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int dynamic_cast; // expected-warning {{identifier 'dynamic_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int explicit; // expected-warning {{identifier 'explicit' conflicts with a C++ keyword}} \ + cxx-error {{'explicit' can only appear on non-static member functions}} \ + cxx-warning {{declaration does not declare anything}} +int export; // expected-warning {{identifier 'export' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int friend; // expected-warning {{identifier 'friend' conflicts with a C++ keyword}} \ + cxx-error {{'friend' used outside of class}} \ + cxx-warning {{declaration does not declare anything}} +int mutable; // expected-warning {{identifier 'mutable' conflicts with a C++ keyword}} \ + cxx-warning {{declaration does not declare anything}} +int namespace; // expected-warning {{identifier 'namespace' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int new; // expected-warning {{identifier 'new' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int operator; // expected-warning {{identifier 'operator' conflicts with a C++ keyword}} \ + cxx-error {{expected a type}} +int private; // expected-warning {{identifier 'private' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int protected; // expected-warning {{identifier 'protected' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int public; // expected-warning {{identifier 'public' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int reinterpret_cast; // expected-warning {{identifier 'reinterpret_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int static_cast; // expected-warning {{identifier 'static_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int template; // expected-warning {{identifier 'template' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int typename; // expected-warning {{identifier 'typename' conflicts with a C++ keyword}} \ + cxx-error {{expected a qualified name after 'typename'}} +int typeid; // expected-warning {{identifier 'typeid' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int using; // expected-warning {{identifier 'using' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int virtual; // expected-warning {{identifier 'virtual' conflicts with a C++ keyword}} \ + cxx-error {{'virtual' can only appear on non-static member functions}} \ + cxx-warning {{declaration does not declare anything}} +int wchar_t; // expected-warning {{identifier 'wchar_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} +int char8_t; // expected-warning {{identifier 'char8_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} +int char16_t; // expected-warning {{identifier 'char16_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} +int char32_t; // expected-warning {{identifier 'char32_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} +int noexcept; // expected-warning {{identifier 'noexcept' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int co_await; // expected-warning {{identifier 'co_await' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int co_return; // expected-warning {{identifier 'co_return' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int co_yield; // expected-warning {{identifier 'co_yield' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int consteval; // expected-warning {{identifier 'consteval' conflicts with a C++ keyword}} \ + cxx-error {{consteval can only be used in function declarations}} +int constinit; // expected-warning {{identifier 'constinit' conflicts with a C++ keyword}} \ + cxx-error {{constinit can only be used in variable declarations}} +int concept; // expected-warning {{identifier 'concept' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int requires; // expected-warning {{identifier 'requires' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} + +// Now try the same thing, but as struct members. +struct S { + int catch; // expected-warning {{identifier 'catch' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int class; // expected-warning {{identifier 'class' conflicts with a C++ keyword}} \ + cxx-error {{declaration of anonymous class must be a definition}} \ + cxx-warning {{declaration does not declare anything}} + int const_cast; // expected-warning {{identifier 'const_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int delete; // expected-warning {{identifier 'delete' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int dynamic_cast; // expected-warning {{identifier 'dynamic_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int explicit; // expected-warning {{identifier 'explicit' conflicts with a C++ keyword}} \ + cxx-error {{'explicit' can only appear on non-static member functions}} \ + cxx-warning {{declaration does not declare anything}} + int export; // expected-warning {{identifier 'export' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int friend; // expected-warning {{identifier 'friend' conflicts with a C++ keyword}} \ + cxx-error {{'friend' must appear first in a non-function declaration}} + int mutable; // expected-warning {{identifier 'mutable' conflicts with a C++ keyword}} \ + cxx-warning {{declaration does not declare anything}} + int namespace; // expected-warning {{identifier 'namespace' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int new; // expected-warning {{identifier 'new' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int operator; // expected-warning {{identifier 'operator' conflicts with a C++ keyword}} \ + cxx-error {{expected a type}} + int private; // expected-warning {{identifier 'private' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int protected; // expected-warning {{identifier 'protected' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int public; // expected-warning {{identifier 'public' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int reinterpret_cast; // expected-warning {{identifier 'reinterpret_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int static_cast; // expected-warning {{identifier 'static_cast' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int template; // expected-warning {{identifier 'template' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int this; // expected-warning {{identifier 'this' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int throw; // expected-warning {{identifier 'throw' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int try; // expected-warning {{identifier 'try' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int typename; // expected-warning {{identifier 'typename' conflicts with a C++ keyword}} \ + cxx-error {{expected a qualified name after 'typename'}} + int typeid; // expected-warning {{identifier 'typeid' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int using; // expected-warning {{identifier 'using' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int virtual; // expected-warning {{identifier 'virtual' conflicts with a C++ keyword}} \ + cxx-error {{'virtual' can only appear on non-static member functions}} \ + cxx-warning {{declaration does not declare anything}} + int wchar_t; // expected-warning {{identifier 'wchar_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} + int char8_t; // expected-warning {{identifier 'char8_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} + int char16_t; // expected-warning {{identifier 'char16_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} + int char32_t; // expected-warning {{identifier 'char32_t' conflicts with a C++ keyword}} \ + cxx-error {{cannot combine with previous 'int' declaration specifier}} \ + cxx-warning {{declaration does not declare anything}} + int noexcept; // expected-warning {{identifier 'noexcept' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int co_await; // expected-warning {{identifier 'co_await' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int co_return; // expected-warning {{identifier 'co_return' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int co_yield; // expected-warning {{identifier 'co_yield' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} + int consteval; // expected-warning {{identifier 'consteval' conflicts with a C++ keyword}} \ + cxx-error {{consteval can only be used in function declarations}} + int constinit; // expected-warning {{identifier 'constinit' conflicts with a C++ keyword}} \ + cxx-error {{constinit can only be used in variable declarations}} + int concept; // expected-warning {{identifier 'concept' conflicts with a C++ keyword}} \ + cxx-error {{concept declarations may only appear in global or namespace scope}} + int requires; // expected-warning {{identifier 'requires' conflicts with a C++ keyword}} \ + cxx-error {{expected member name or ';' after declaration specifiers}} \ + cxx-error {{trailing requires clause can only be used when declaring a function}} \ + cxx-error {{expected expression}} +}; + +// Smoke test that we catch a keyword used as an enumerator. If we diagnose +// one, we'll diagnose them all. +enum E { + throw, // expected-warning {{identifier 'throw' conflicts with a C++ keyword}} \ + cxx-error {{expected identifier}} +}; + +// Smoke test that we catch a keyword used as a tag name. +struct try { // expected-warning {{identifier 'try' conflicts with a C++ keyword}} \ + cxx-error {{declaration of anonymous struct must be a definition}} \ + cxx-warning {{declaration does not declare anything}} + int x; +}; + +// Smoke test that we catch keyword use in a function name. +void this(void); // expected-warning {{identifier 'this' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} + +// Smoke test that we catch keyword use in function parameters too. +void func(int private); // expected-warning {{identifier 'private' conflicts with a C++ keyword}} \ + cxx-error {{invalid parameter name: 'private' is a keyword}} + +// These are conditionally a keyword in C++, so they're intentionally not being +// diagnosed as a keyword. +int module; +int import; + +// We do not diagnose use of C++ keywords when used as a macro name because +// that does not conflict with C++ (the macros will be replaced before the +// keyword is seen by the parser). +#define this 12 + +// FIXME: These tests are disabled for C++ because it causes a crash. +// See GH114815. +#ifndef __cplusplus +int decltype; // expected-warning {{identifier 'decltype' conflicts with a C++ keyword}} +struct T { + int decltype; // expected-warning {{identifier 'decltype' conflicts with a C++ keyword}} +}; +#endif // __cplusplus >From 3b0732e353d52808d4405ca306b87b85e6dbcd13 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Thu, 24 Apr 2025 14:49:40 -0400 Subject: [PATCH 02/16] Fix the parameter handling --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaDecl.cpp | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 57b604335fcdd..453bdc9870bdb 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -419,6 +419,8 @@ Improvements to Clang's diagnostics - ``-Winitializer-overrides`` and ``-Wreorder-init-list`` are now grouped under the ``-Wc99-designator`` diagnostic group, as they also are about the behavior of the C99 feature as it was introduced into C++20. Fixes #GH47037 +- ``-Wreserved-identifier`` now fires on reserved parameter names in a function + declaration which is not a definition. Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 9bb96dbdc1670..74432ea34bd57 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10438,6 +10438,15 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // Finally, we know we have the right number of parameters, install them. NewFD->setParams(Params); + // If this declarator is a declaration and not a definition, its parameters + // will not be pushed onto a scope chain. That means we will not issue any + // reserved identifier warnings for the declaration, but we will for the + // definition. Handle those here. + if (!Params.empty() && !D.isFunctionDefinition()) { + for (const ParmVarDecl *PVD : Params) + warnOnReservedIdentifier(PVD); + } + if (D.getDeclSpec().isNoreturnSpecified()) NewFD->addAttr( C11NoReturnAttr::Create(Context, D.getDeclSpec().getNoreturnSpecLoc())); @@ -15333,7 +15342,6 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D, if (D.isInvalidType()) New->setInvalidDecl(); - warnOnReservedIdentifier(New); CheckExplicitObjectParameter(*this, New, ExplicitThisLoc); assert(S->isFunctionPrototypeScope()); >From 55350f090fe3af859d0ab8caad4d308b3af56f26 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Thu, 24 Apr 2025 14:56:49 -0400 Subject: [PATCH 03/16] Update comment; NFC --- clang/lib/Basic/IdentifierTable.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 412f0af40861a..a8b64364a1745 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -368,11 +368,7 @@ bool IdentifierInfo::isNameKeyword(const LangOptions &LangOpts) const { // tests if the identifier is a keyword token in C++ mode and then isn't a // keyword token in C modes. In our case, if it was a keyword, we wouldn't // have gotten the identifier for it anyway, so that function will always - // return false for us. Instead, check against the token definitions directly. - // - // FIXME: It would be nice to handle things like char8_t and wchar_t here as - // well, but those require looking at the currently selected language options - // which would make this interface confusing with isCPlusPlusKeyword. + // return false for us. Instead, check against the identifier name directly. switch (getNameKwStatus(LangOpts, getName())) { case KS_Enabled: case KS_Extension: >From 52fe07b4573de591b3c2aca3d6fc5b3ed7d2e6df Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Thu, 24 Apr 2025 15:00:57 -0400 Subject: [PATCH 04/16] Fix formatting; NFC --- clang/lib/Sema/SemaDecl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 74432ea34bd57..f02050d9a39dd 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6108,7 +6108,7 @@ static bool isFromSystemHeader(SourceManager &SM, const Decl *D) { SM.isInSystemMacro(D->getLocation()); } -static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo* II) { +static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { if (!II) return false; >From fb976c20a911f033eb3b0e18c19a2efeb1c046a8 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 07:39:52 -0400 Subject: [PATCH 05/16] Only perform the check if the diagnostic is enabled Checking to see how much performance this claws back --- clang/lib/Sema/SemaDecl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f02050d9a39dd..289a2dd3bae6e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6138,6 +6138,8 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { } // Diagnose use of C++ keywords in C as being incompatible with C++. if (!getLangOpts().CPlusPlus && + !Diags.isIgnored(diag::warn_identifier_is_cpp_keyword, + D->getLocation()) && isKeywordInCPlusPlus(*this, D->getIdentifier())) Diag(D->getLocation(), diag::warn_identifier_is_cpp_keyword) << D; } >From 972a5fad97242c673aa17ca9999dfa5044ef2320 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 07:40:51 -0400 Subject: [PATCH 06/16] Add test cases for override and final --- clang/test/Sema/c++-keyword-in-c.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/test/Sema/c++-keyword-in-c.c b/clang/test/Sema/c++-keyword-in-c.c index 364f115ddcb44..519c7f1c2f430 100644 --- a/clang/test/Sema/c++-keyword-in-c.c +++ b/clang/test/Sema/c++-keyword-in-c.c @@ -197,6 +197,8 @@ void func(int private); // expected-warning {{identifier 'private' conflicts wit // diagnosed as a keyword. int module; int import; +int override; +int final; // We do not diagnose use of C++ keywords when used as a macro name because // that does not conflict with C++ (the macros will be replaced before the >From 81ac3ce2f73a2750bbab3454fb81f544efec4122 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 07:50:44 -0400 Subject: [PATCH 07/16] Handle alternative keywords --- clang/lib/Basic/IdentifierTable.cpp | 1 + clang/test/Sema/c++-keyword-in-c.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index a8b64364a1745..95c4d34f65a7b 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -347,6 +347,7 @@ static KeywordStatus getNameKwStatus(const LangOptions &LangOpts, StringRef Name) { return llvm::StringSwitch<KeywordStatus>(Name) #define KEYWORD(NAME, FLAGS) .Case(#NAME, getKeywordStatus(LangOpts, FLAGS)) +#define CXX_KEYWORD_OPERATOR(NAME, TOK) .Case(#NAME, KS_Enabled) #include "clang/Basic/TokenKinds.def" .Default(KS_Disabled); } diff --git a/clang/test/Sema/c++-keyword-in-c.c b/clang/test/Sema/c++-keyword-in-c.c index 519c7f1c2f430..c573c4cd22cb7 100644 --- a/clang/test/Sema/c++-keyword-in-c.c +++ b/clang/test/Sema/c++-keyword-in-c.c @@ -213,3 +213,27 @@ struct T { int decltype; // expected-warning {{identifier 'decltype' conflicts with a C++ keyword}} }; #endif // __cplusplus + +// Check alternative operator names. +int and; // expected-warning {{identifier 'and' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int and_eq; // expected-warning {{identifier 'and_eq' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int bitand; // expected-warning {{identifier 'bitand' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int bitor; // expected-warning {{identifier 'bitor' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int compl; // expected-warning {{identifier 'compl' conflicts with a C++ keyword}} \ + cxx-error {{expected a class name after '~' to name a destructor}} +int not; // expected-warning {{identifier 'not' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int not_eq; // expected-warning {{identifier 'not_eq' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int or; // expected-warning {{identifier 'or' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int or_eq; // expected-warning {{identifier 'or_eq' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int xor; // expected-warning {{identifier 'xor' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} +int xor_eq; // expected-warning {{identifier 'xor_eq' conflicts with a C++ keyword}} \ + cxx-error {{expected unqualified-id}} >From 6c5d838cdb103e80163e5b32cca978dfb21ae067 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 07:51:54 -0400 Subject: [PATCH 08/16] Rename flag --- clang/docs/ReleaseNotes.rst | 5 ++--- clang/include/clang/Basic/DiagnosticGroups.td | 2 +- clang/test/Sema/c++-keyword-in-c.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 453bdc9870bdb..413db1c44fa01 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -143,9 +143,8 @@ C Language Changes - Added ``-Wimplicit-void-ptr-cast``, grouped under ``-Wc++-compat``, which diagnoses implicit conversion from ``void *`` to another pointer type as being incompatible with C++. (#GH17792) -- Added ``-Widentifier-is-c++-keyword``, grouped under ``-Wc++-compat``, which - diagnoses when a C++ keyword is used as an identifier in C. Partially - addresses #GH21898. +- Added ``-Wc++-keyword``, grouped under ``-Wc++-compat``, which diagnoses when + a C++ keyword is used as an identifier in C. Partially addresses #GH21898. C2y Feature Support ^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 84e590a857398..6f40c69a8b405 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -155,7 +155,7 @@ def C99Compat : DiagGroup<"c99-compat">; def C23Compat : DiagGroup<"c23-compat">; def : DiagGroup<"c2x-compat", [C23Compat]>; -def CppKeywordInC : DiagGroup<"identifier-is-c++-keyword">; +def CppKeywordInC : DiagGroup<"c++-keyword">; def ImplicitVoidPtrCast : DiagGroup<"implicit-void-ptr-cast">; def CXXCompat: DiagGroup<"c++-compat", [ImplicitVoidPtrCast, CppKeywordInC]>; def ExternCCompat : DiagGroup<"extern-c-compat">; diff --git a/clang/test/Sema/c++-keyword-in-c.c b/clang/test/Sema/c++-keyword-in-c.c index c573c4cd22cb7..f417469441114 100644 --- a/clang/test/Sema/c++-keyword-in-c.c +++ b/clang/test/Sema/c++-keyword-in-c.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -Widentifier-is-c++-keyword %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wc++-keyword %s // RUN: %clang_cc1 -fsyntax-only -verify -Wc++-compat %s // RUN: %clang_cc1 -fsyntax-only -verify=good %s // RUN: %clang_cc1 -fsyntax-only -verify=cxx -x c++ -std=c++2c %s >From 20dcdae0b8021b3bc0c6d2ada87230b5ecd2374b Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 07:54:44 -0400 Subject: [PATCH 09/16] Temporarily back this change out I want to see if I can improve the speed before we conditionally enable the check. --- clang/lib/Sema/SemaDecl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 289a2dd3bae6e..12cceb9141933 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6138,8 +6138,9 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { } // Diagnose use of C++ keywords in C as being incompatible with C++. if (!getLangOpts().CPlusPlus && - !Diags.isIgnored(diag::warn_identifier_is_cpp_keyword, +/* !Diags.isIgnored(diag::warn_identifier_is_cpp_keyword, D->getLocation()) && +*/ isKeywordInCPlusPlus(*this, D->getIdentifier())) Diag(D->getLocation(), diag::warn_identifier_is_cpp_keyword) << D; } >From 07e44e05f85a2a42a00ab18fbb2589ee87af184d Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 08:20:16 -0400 Subject: [PATCH 10/16] Remove the need for string comparisons If this ends up performing significantly better, I'll rip out the changes to IdentifierTable. --- clang/include/clang/Basic/TokenKinds.def | 1 + clang/lib/Sema/SemaDecl.cpp | 29 +++++++++++++++--------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 868e851342eb8..fb53d88deea4a 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -1042,6 +1042,7 @@ ANNOTATION(embed) #undef TYPE_TRAIT_2 #undef TYPE_TRAIT_1 #undef TYPE_TRAIT +#undef MODULES_KEYWORD #undef CXX20_KEYWORD #undef CXX11_KEYWORD #undef KEYWORD diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 12cceb9141933..ef24ff2e3146a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -59,6 +59,7 @@ #include "clang/Sema/SemaWasm.h" #include "clang/Sema/Template.h" #include "llvm/ADT/STLForwardCompat.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -6112,17 +6113,23 @@ static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { if (!II) return false; - // Clear all the C language options, set all the C++ language options, but - // otherwise leave all the defaults alone. This isn't ideal because some - // feature flags are language-specific, but this is a reasonable - // approximation. - LangOptions LO = S.getLangOpts(); - LO.CPlusPlus = LO.CPlusPlus11 = LO.CPlusPlus14 = LO.CPlusPlus17 = - LO.CPlusPlus20 = LO.CPlusPlus23 = LO.CPlusPlus26 = 1; - LO.C99 = LO.C11 = LO.C17 = LO.C23 = LO.C2y = 0; - LO.Char8 = 1; // Presume this is always a keyword in C++. - LO.WChar = 1; // Presume this is always a keyword in C++. - return II->isNameKeyword(LO); + // Build a static map of identifiers for all of the keywords in C++ that are + // not keywords in C. This allows us to do pointer comparisons instead of + // string comparisons when deciding whether the given identifier is a keyword + // or not. Note, this treats all keywords as being enabled, regardless of the + // setting of other language options. It intentionally disables the modules + // keywords because those are conditional keywords, so may be safe to use. + static llvm::SmallPtrSet<IdentifierInfo *, 32> Keywords; + if (Keywords.empty()) { +#define MODULES_KEYWORD(NAME) +#define KEYWORD(NAME, FLAGS) \ + Keywords.insert(&S.getPreprocessor().getIdentifierTable().get(#NAME)); +#define CXX_KEYWORD_OPERATOR(NAME, TOK) \ + Keywords.insert(&S.getPreprocessor().getIdentifierTable().get(#NAME)); +#include "clang/Basic/TokenKinds.def" + } + + return Keywords.contains(II); } void Sema::warnOnReservedIdentifier(const NamedDecl *D) { >From c8b6f485354029957e448069a9096170558747bb Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Fri, 25 Apr 2025 09:50:30 -0400 Subject: [PATCH 11/16] Remove old implementation, go back to checking only when enabled --- clang/include/clang/Basic/IdentifierTable.h | 11 +++------- clang/lib/Basic/IdentifierTable.cpp | 24 --------------------- clang/lib/Sema/SemaDecl.cpp | 3 +-- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 05c989c1b07d9..1275b056227b5 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -444,18 +444,13 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { } bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } - /// Return true if this identifier uses a keyword token and is a keyword in - /// the specified language. + /// Return true if this token is a keyword in the specified language. bool isKeyword(const LangOptions &LangOpts) const; - /// Return true if this identifier uses a keyword token and is a C++ keyword - /// in the specified language. + /// Return true if this token is a C++ keyword in the specified + /// language. bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; - /// Returns true if the name of this identifier matches a keyword given the - /// specified language options. - bool isNameKeyword(const LangOptions &LangOpts) const; - /// Get and set FETokenInfo. The language front-end is allowed to associate /// arbitrary metadata with this token. void *getFETokenInfo() const { return FETokenInfo; } diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 95c4d34f65a7b..16151c94464f9 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -343,15 +343,6 @@ static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts, } } -static KeywordStatus getNameKwStatus(const LangOptions &LangOpts, - StringRef Name) { - return llvm::StringSwitch<KeywordStatus>(Name) -#define KEYWORD(NAME, FLAGS) .Case(#NAME, getKeywordStatus(LangOpts, FLAGS)) -#define CXX_KEYWORD_OPERATOR(NAME, TOK) .Case(#NAME, KS_Enabled) -#include "clang/Basic/TokenKinds.def" - .Default(KS_Disabled); -} - /// Returns true if the identifier represents a keyword in the /// specified language. bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const { @@ -364,21 +355,6 @@ bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const { } } -bool IdentifierInfo::isNameKeyword(const LangOptions &LangOpts) const { - // This differs from IdentifierInfo::isCPlusPlusKeyword(). That function - // tests if the identifier is a keyword token in C++ mode and then isn't a - // keyword token in C modes. In our case, if it was a keyword, we wouldn't - // have gotten the identifier for it anyway, so that function will always - // return false for us. Instead, check against the identifier name directly. - switch (getNameKwStatus(LangOpts, getName())) { - case KS_Enabled: - case KS_Extension: - return true; - default: - return false; - } -} - /// Returns true if the identifier represents a C++ keyword in the /// specified language. bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index c3924d3ac7690..84f5eaf7d396f 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6145,9 +6145,8 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { } // Diagnose use of C++ keywords in C as being incompatible with C++. if (!getLangOpts().CPlusPlus && -/* !Diags.isIgnored(diag::warn_identifier_is_cpp_keyword, + !Diags.isIgnored(diag::warn_identifier_is_cpp_keyword, D->getLocation()) && -*/ isKeywordInCPlusPlus(*this, D->getIdentifier())) Diag(D->getLocation(), diag::warn_identifier_is_cpp_keyword) << D; } >From 6bd0c2f16198554e9bafe4ce8070b5b81be1fc5a Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Mon, 28 Apr 2025 12:27:16 -0400 Subject: [PATCH 12/16] Update based on review feedback * Switch to using a binary_search * Remove an unnecessary check for empty() --- clang/lib/Sema/SemaDecl.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 82eac95cd39de..c91e3b23c3da6 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6119,17 +6119,20 @@ static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { // or not. Note, this treats all keywords as being enabled, regardless of the // setting of other language options. It intentionally disables the modules // keywords because those are conditional keywords, so may be safe to use. - static llvm::SmallPtrSet<IdentifierInfo *, 32> Keywords; + static llvm::SmallVector<uintptr_t, 48> Keywords; if (Keywords.empty()) { #define MODULES_KEYWORD(NAME) #define KEYWORD(NAME, FLAGS) \ - Keywords.insert(&S.getPreprocessor().getIdentifierTable().get(#NAME)); + Keywords.push_back(reinterpret_cast<uint64_t>( \ + &S.getPreprocessor().getIdentifierTable().get(#NAME))); #define CXX_KEYWORD_OPERATOR(NAME, TOK) \ - Keywords.insert(&S.getPreprocessor().getIdentifierTable().get(#NAME)); + Keywords.push_back(reinterpret_cast<uint64_t>( \ + &S.getPreprocessor().getIdentifierTable().get(#NAME))); #include "clang/Basic/TokenKinds.def" + llvm::sort(Keywords); } - return Keywords.contains(II); + return llvm::binary_search(Keywords, reinterpret_cast<uintptr_t>(II)); } void Sema::warnOnReservedIdentifier(const NamedDecl *D) { @@ -10451,7 +10454,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // will not be pushed onto a scope chain. That means we will not issue any // reserved identifier warnings for the declaration, but we will for the // definition. Handle those here. - if (!Params.empty() && !D.isFunctionDefinition()) { + if (!D.isFunctionDefinition()) { for (const ParmVarDecl *PVD : Params) warnOnReservedIdentifier(PVD); } >From b07e0fddb4a5f01d8dc223e7c950b7cdbbf7a250 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Mon, 28 Apr 2025 13:03:36 -0400 Subject: [PATCH 13/16] One more iteration This one uses std::array so the size is exact. --- clang/lib/Sema/SemaDecl.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index c91e3b23c3da6..1aef8de89981c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6109,6 +6109,15 @@ static bool isFromSystemHeader(SourceManager &SM, const Decl *D) { SM.isInSystemMacro(D->getLocation()); } +constexpr unsigned countCPlusPlusKeywords() { + unsigned Ret = 0; +#define MODULES_KEYWORD(NAME) +#define KEYWORD(NAME, FLAGS) ++Ret; +#define CXX_KEYWORD_OPERATOR(NAME, TOK) ++Ret; +#include "clang/Basic/TokenKinds.def" + return Ret; +} + static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { if (!II) return false; @@ -6119,16 +6128,18 @@ static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { // or not. Note, this treats all keywords as being enabled, regardless of the // setting of other language options. It intentionally disables the modules // keywords because those are conditional keywords, so may be safe to use. - static llvm::SmallVector<uintptr_t, 48> Keywords; - if (Keywords.empty()) { + static std::array<uintptr_t, countCPlusPlusKeywords()> Keywords = {}; + if (!Keywords[0]) { + unsigned Idx = 0; #define MODULES_KEYWORD(NAME) #define KEYWORD(NAME, FLAGS) \ - Keywords.push_back(reinterpret_cast<uint64_t>( \ - &S.getPreprocessor().getIdentifierTable().get(#NAME))); + Keywords[Idx++] = reinterpret_cast<uint64_t>( \ + &S.getPreprocessor().getIdentifierTable().get(#NAME)); #define CXX_KEYWORD_OPERATOR(NAME, TOK) \ - Keywords.push_back(reinterpret_cast<uint64_t>( \ - &S.getPreprocessor().getIdentifierTable().get(#NAME))); + Keywords[Idx++] = reinterpret_cast<uint64_t>( \ + &S.getPreprocessor().getIdentifierTable().get(#NAME)); #include "clang/Basic/TokenKinds.def" + assert(Idx == Keywords.size() && "expected to fill every member!"); llvm::sort(Keywords); } >From 5bf400674dc70a4f0cd70bd713e79a5fb48d3f06 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Mon, 28 Apr 2025 16:58:29 -0400 Subject: [PATCH 14/16] Change initialization; NFC --- clang/lib/Sema/SemaDecl.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 1aef8de89981c..5388672257689 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6128,20 +6128,21 @@ static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { // or not. Note, this treats all keywords as being enabled, regardless of the // setting of other language options. It intentionally disables the modules // keywords because those are conditional keywords, so may be safe to use. - static std::array<uintptr_t, countCPlusPlusKeywords()> Keywords = {}; - if (!Keywords[0]) { + static auto Keywords = [&S] { + std::array<uintptr_t, countCPlusPlusKeywords()> Ret; unsigned Idx = 0; #define MODULES_KEYWORD(NAME) #define KEYWORD(NAME, FLAGS) \ - Keywords[Idx++] = reinterpret_cast<uint64_t>( \ + Ret[Idx++] = reinterpret_cast<uint64_t>( \ &S.getPreprocessor().getIdentifierTable().get(#NAME)); #define CXX_KEYWORD_OPERATOR(NAME, TOK) \ - Keywords[Idx++] = reinterpret_cast<uint64_t>( \ + Ret[Idx++] = reinterpret_cast<uint64_t>( \ &S.getPreprocessor().getIdentifierTable().get(#NAME)); #include "clang/Basic/TokenKinds.def" - assert(Idx == Keywords.size() && "expected to fill every member!"); - llvm::sort(Keywords); - } + assert(Idx == Ret.size() && "expected to fill every member!"); + llvm::sort(Ret); + return Ret; + }(); return llvm::binary_search(Keywords, reinterpret_cast<uintptr_t>(II)); } >From d0551cad178ed28539c416e581125d9a3525f321 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Tue, 29 Apr 2025 07:53:25 -0400 Subject: [PATCH 15/16] Go with another implementation of the keyword logic --- clang/include/clang/Basic/IdentifierTable.h | 12 +++++- clang/lib/Basic/IdentifierTable.cpp | 42 +++++++++++++++++--- clang/lib/Sema/SemaDecl.cpp | 44 +-------------------- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 1275b056227b5..1a9186cc6c637 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -195,7 +195,11 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsFinal : 1; - // 22 bits left in a 64-bit word. + // True if this identifier would be a keyword in C++ mode. + LLVM_PREFERRED_TYPE(bool) + unsigned IsKeywordInCpp : 1; + + // 21 bits left in a 64-bit word. // Managed by the language front-end. void *FETokenInfo = nullptr; @@ -212,7 +216,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false) {} + IsRestrictExpansion(false), IsFinal(false), IsKeywordInCpp(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -444,6 +448,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { } bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } + /// Return true if this identifier would be a keyword in C++ mode. + bool IsKeywordInCPlusPlus() const { return IsKeywordInCpp; } + void setIsKeywordInCPlusPlus(bool Val = true) { IsKeywordInCpp = Val; } + /// Return true if this token is a keyword in the specified language. bool isKeyword(const LangOptions &LangOpts) const; diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 16151c94464f9..6610cd9ac40d6 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -250,6 +250,31 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts, return CurStatus; } +static bool IsKeywordInCpp(unsigned Flags) { + while (Flags != 0) { + unsigned CurFlag = Flags & ~(Flags - 1); + Flags = Flags & ~CurFlag; + switch (static_cast<TokenKey>(CurFlag)) { + case KEYCXX: + case KEYCXX11: + case KEYCXX20: + case BOOLSUPPORT: + case WCHARSUPPORT: + case CHAR8SUPPORT: + return true; + default: + break; // Go to the next flag, try again. + } + } + return false; +} + +static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table, + StringRef Name) { + IdentifierInfo &II = Table.get(Name, tok::identifier); + II.setIsKeywordInCPlusPlus(); +} + /// AddKeyword - This method is used to associate a token ID with specific /// identifiers because they are language keywords. This causes the lexer to /// automatically map matching identifiers to specialized token codes. @@ -258,8 +283,13 @@ static void AddKeyword(StringRef Keyword, const LangOptions &LangOpts, IdentifierTable &Table) { KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags); - // Don't add this keyword if disabled in this language. - if (AddResult == KS_Disabled) return; + // Don't add this keyword if disabled in this language and isn't otherwise + // special. + if (AddResult == KS_Disabled) { + if (IsKeywordInCpp(Flags)) + MarkIdentifierAsKeywordInCpp(Table, Keyword); + return; + } IdentifierInfo &Info = Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode); @@ -304,9 +334,11 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { #define ALIAS(NAME, TOK, FLAGS) \ AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \ FLAGS, LangOpts, *this); -#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ - if (LangOpts.CXXOperatorNames) \ - AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); +#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ + if (LangOpts.CXXOperatorNames) \ + AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); \ + else \ + MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME)); #define OBJC_AT_KEYWORD(NAME) \ if (LangOpts.ObjC) \ AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 605519ed013fa..e8c254b7aa1d9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6109,44 +6109,6 @@ static bool isFromSystemHeader(SourceManager &SM, const Decl *D) { SM.isInSystemMacro(D->getLocation()); } -constexpr unsigned countCPlusPlusKeywords() { - unsigned Ret = 0; -#define MODULES_KEYWORD(NAME) -#define KEYWORD(NAME, FLAGS) ++Ret; -#define CXX_KEYWORD_OPERATOR(NAME, TOK) ++Ret; -#include "clang/Basic/TokenKinds.def" - return Ret; -} - -static bool isKeywordInCPlusPlus(const Sema &S, const IdentifierInfo *II) { - if (!II) - return false; - - // Build a static map of identifiers for all of the keywords in C++ that are - // not keywords in C. This allows us to do pointer comparisons instead of - // string comparisons when deciding whether the given identifier is a keyword - // or not. Note, this treats all keywords as being enabled, regardless of the - // setting of other language options. It intentionally disables the modules - // keywords because those are conditional keywords, so may be safe to use. - static auto Keywords = [&S] { - std::array<uintptr_t, countCPlusPlusKeywords()> Ret; - unsigned Idx = 0; -#define MODULES_KEYWORD(NAME) -#define KEYWORD(NAME, FLAGS) \ - Ret[Idx++] = reinterpret_cast<uint64_t>( \ - &S.getPreprocessor().getIdentifierTable().get(#NAME)); -#define CXX_KEYWORD_OPERATOR(NAME, TOK) \ - Ret[Idx++] = reinterpret_cast<uint64_t>( \ - &S.getPreprocessor().getIdentifierTable().get(#NAME)); -#include "clang/Basic/TokenKinds.def" - assert(Idx == Ret.size() && "expected to fill every member!"); - llvm::sort(Ret); - return Ret; - }(); - - return llvm::binary_search(Keywords, reinterpret_cast<uintptr_t>(II)); -} - void Sema::warnOnReservedIdentifier(const NamedDecl *D) { // Avoid warning twice on the same identifier, and don't warn on redeclaration // of system decl. @@ -6159,10 +6121,8 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { << D << static_cast<int>(Status); } // Diagnose use of C++ keywords in C as being incompatible with C++. - if (!getLangOpts().CPlusPlus && - !Diags.isIgnored(diag::warn_identifier_is_cpp_keyword, - D->getLocation()) && - isKeywordInCPlusPlus(*this, D->getIdentifier())) + if (const IdentifierInfo *II = D->getIdentifier(); + II && !getLangOpts().CPlusPlus && II->IsKeywordInCPlusPlus()) Diag(D->getLocation(), diag::warn_identifier_is_cpp_keyword) << D; } >From 16594b355f878b7b9a92a2e522d26fefe74743c8 Mon Sep 17 00:00:00 2001 From: Aaron Ballman <aa...@aaronballman.com> Date: Tue, 29 Apr 2025 13:33:14 -0400 Subject: [PATCH 16/16] Implement in the preprocessor rather than Sema --- clang/include/clang/Basic/DiagnosticLexKinds.td | 3 +++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 --- clang/include/clang/Basic/IdentifierTable.h | 1 + clang/lib/Basic/IdentifierTable.cpp | 1 + clang/lib/Lex/Preprocessor.cpp | 5 +++++ clang/lib/Sema/SemaDecl.cpp | 4 ---- 6 files changed, 10 insertions(+), 7 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index f29edfa835d42..a3c32107596f0 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -421,6 +421,9 @@ def warn_pp_macro_is_reserved_attribute_id : Warning< def warn_pp_objc_macro_redef_ignored : Warning< "ignoring redefinition of Objective-C qualifier macro">, InGroup<DiagGroup<"objc-macro-redefinition">>; +def warn_pp_identifier_is_cpp_keyword : Warning< + "identifier %0 conflicts with a C++ keyword">, + InGroup<CppKeywordInC>, DefaultIgnore; def pp_invalid_string_literal : Warning< "invalid string literal, ignoring final '\\'">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f22e91f394a43..ad5bf26be2590 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -496,9 +496,6 @@ def warn_unused_lambda_capture: Warning<"lambda capture %0 is not " "%select{used|required to be captured for this use}1">, InGroup<UnusedLambdaCapture>, DefaultIgnore; -def warn_identifier_is_cpp_keyword : Warning< - "identifier %0 conflicts with a C++ keyword">, - InGroup<CppKeywordInC>, DefaultIgnore; def warn_decl_hidden_in_cpp : Warning< "%select{struct|union|enum}0 defined within a struct or union is not visible " "in C++">, InGroup<HiddenCppDecl>, DefaultIgnore; diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 1a9186cc6c637..54540193cfcc0 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -470,6 +470,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { /// If this returns false, we know that HandleIdentifier will not affect /// the token. bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } + void setHandleIdentifierCase(bool Val = true) { NeedsHandleIdentifier = Val; } /// Return true if the identifier in its current state was loaded /// from an AST file. diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 6610cd9ac40d6..b9f5fec46f868 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -273,6 +273,7 @@ static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table, StringRef Name) { IdentifierInfo &II = Table.get(Name, tok::identifier); II.setIsKeywordInCPlusPlus(); + II.setHandleIdentifierCase(); } /// AddKeyword - This method is used to associate a token ID with specific diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 4c050bf1f5bb2..9ea7b95622c76 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -834,6 +834,11 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { II.setIsFutureCompatKeyword(false); } + // If this identifier would be a keyword in C++, diagnose as a compatibility + // issue. + if (II.IsKeywordInCPlusPlus() && !DisableMacroExpansion) + Diag(Identifier, diag::warn_pp_identifier_is_cpp_keyword) << &II; + // If this is an extension token, diagnose its use. // We avoid diagnosing tokens that originate from macro definitions. // FIXME: This warning is disabled in cases where it shouldn't be, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a58d4bf62eb9b..6abaacd3a8410 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6120,10 +6120,6 @@ void Sema::warnOnReservedIdentifier(const NamedDecl *D) { Diag(D->getLocation(), diag::warn_reserved_extern_symbol) << D << static_cast<int>(Status); } - // Diagnose use of C++ keywords in C as being incompatible with C++. - if (const IdentifierInfo *II = D->getIdentifier(); - II && !getLangOpts().CPlusPlus && II->IsKeywordInCPlusPlus()) - Diag(D->getLocation(), diag::warn_identifier_is_cpp_keyword) << D; } Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits