https://github.com/Endilll updated https://github.com/llvm/llvm-project/pull/71709
>From 2823d38544d18213b5bf48c67e4eedd52acce850 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov <serebrennikov.vladis...@gmail.com> Date: Wed, 8 Nov 2023 20:30:37 +0300 Subject: [PATCH 1/4] [clang] Refactor `IdentifierInfo::ObjcOrBuiltinID` This patch refactors how values are stored inside `IdentifierInfo::ObjcOrBuiltinID` bit-field, and annotates it with `preferred_type`. In order to make the value easier to interpret by debuggers, a new `ObjCKeywordOrInterestingOrBuiltin` is added. Previous "layout" of this fields couldn't be represented with this new enum, because it skipped over some arbitrary enumerators, so a new "layout" was invented based on `ObjCKeywordOrInterestingOrBuiltin` enum. I believe the new layout is simpler than the new one. --- clang/include/clang/Basic/IdentifierTable.h | 117 ++++++++++++-------- 1 file changed, 73 insertions(+), 44 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 0898e7d39dd7de..fa76228da2b143 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H +#include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticIDs.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/TokenKinds.h" @@ -86,19 +87,26 @@ enum { IdentifierInfoAlignment = 8 }; static constexpr int ObjCOrBuiltinIDBits = 16; /// The "layout" of ObjCOrBuiltinID is: -/// - The first value (0) represents "not a special identifier". -/// - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not -/// including objc_not_keyword). -/// - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent -/// InterestingIdentifierKinds (not including not_interesting). -/// - The rest of the values represent builtin IDs (not including NotBuiltin). -static constexpr int FirstObjCKeywordID = 1; -static constexpr int LastObjCKeywordID = - FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2; -static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1; -static constexpr int LastInterestingIdentifierID = - FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2; -static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1; +/// - ObjCKeywordKind enumerators +/// - InterestingIdentifierKind enumerators +/// - Builtin::ID enumerators +/// - NonSpecialIdentifier +enum class ObjCKeywordOrInterestingOrBuiltin { +#define OBJC_AT_KEYWORD(X) objc_##X, +#include "clang/Basic/TokenKinds.def" + NUM_OBJC_KEYWORDS, + +#define INTERESTING_IDENTIFIER(X) X, +#include "clang/Basic/TokenKinds.def" + NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS, + + NotBuiltin, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/Builtins.def" + FirstTSBuiltin, + + NonSpecialIdentifier = 65534 +}; /// One of these records is kept for each identifier that /// is lexed. This contains information about whether the token was \#define'd, @@ -113,9 +121,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(tok::TokenKind) unsigned TokenID : 9; - // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf). - // First NUM_OBJC_KEYWORDS values are for Objective-C, - // the remaining values are for builtins. + LLVM_PREFERRED_TYPE(ObjCKeywordOrInterestingOrBuiltin) unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits; // True if there is a #define for this. @@ -198,13 +204,16 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; IdentifierInfo() - : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false), - HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), - IsPoisoned(false), IsCPPOperatorKeyword(false), - NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), - FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), - IsModulesImport(false), IsMangledOpenMPVariantName(false), - IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {} + : TokenID(tok::identifier), + ObjCOrBuiltinID(llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)), + HasMacro(false), HadMacro(false), IsExtension(false), + IsFutureCompatKeyword(false), IsPoisoned(false), + IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), + IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), + RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), + IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), + IsRestrictExpansion(false), IsFinal(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -332,42 +341,62 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { /// /// For example, 'class' will return tok::objc_class if ObjC is enabled. tok::ObjCKeywordKind getObjCKeywordID() const { - static_assert(FirstObjCKeywordID == 1, - "hard-coding this assumption to simplify code"); - if (ObjCOrBuiltinID <= LastObjCKeywordID) - return tok::ObjCKeywordKind(ObjCOrBuiltinID); - else - return tok::objc_not_keyword; + auto Value = + static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID); + if (Value < ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS) + return static_cast<tok::ObjCKeywordKind>(ObjCOrBuiltinID); + return tok::objc_not_keyword; + } + void setObjCKeywordID(tok::ObjCKeywordKind ID) { + ObjCOrBuiltinID = ID; + assert(getObjCKeywordID() == ID && "ID too large for field!"); } - void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } /// Return a value indicating whether this is a builtin function. - /// - /// 0 is not-built-in. 1+ are specific builtin functions. unsigned getBuiltinID() const { - if (ObjCOrBuiltinID >= FirstBuiltinID) - return 1 + (ObjCOrBuiltinID - FirstBuiltinID); - else - return 0; + auto Value = + static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID); + if (Value > ObjCKeywordOrInterestingOrBuiltin:: + NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS && + Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier) + return static_cast<Builtin::ID>( + ObjCOrBuiltinID - 1 - + llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin:: + NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS)); + return Builtin::ID::NotBuiltin; } void setBuiltinID(unsigned ID) { - assert(ID != 0); - ObjCOrBuiltinID = FirstBuiltinID + (ID - 1); + assert(ID != Builtin::ID::NotBuiltin); + ObjCOrBuiltinID = + ID + 1 + + llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin:: + NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS); assert(getBuiltinID() == ID && "ID too large for field!"); } - void clearBuiltinID() { ObjCOrBuiltinID = 0; } + void clearBuiltinID() { + ObjCOrBuiltinID = llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier); + } tok::InterestingIdentifierKind getInterestingIdentifierID() const { - if (ObjCOrBuiltinID >= FirstInterestingIdentifierID && - ObjCOrBuiltinID <= LastInterestingIdentifierID) - return tok::InterestingIdentifierKind( - 1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID)); + auto Value = + static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID); + if (Value > ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS && + Value < ObjCKeywordOrInterestingOrBuiltin:: + NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS) + return static_cast<tok::InterestingIdentifierKind>( + ObjCOrBuiltinID - 1 - + llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS)); else return tok::not_interesting; } void setInterestingIdentifierID(unsigned ID) { assert(ID != tok::not_interesting); - ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1); + ObjCOrBuiltinID = ID + 1 + + llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS); assert(getInterestingIdentifierID() == ID && "ID too large for field!"); } >From 4d76f2d881fedd66d63f3c0b4d9feadd75004306 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov <serebrennikov.vladis...@gmail.com> Date: Thu, 16 Nov 2023 14:45:37 +0300 Subject: [PATCH 2/4] Fix checking `getObjCOrBuiltinID()` against zero --- clang/lib/Serialization/ASTReader.cpp | 6 +++++- clang/lib/Serialization/ASTWriter.cpp | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 42b48d230af7a9..5b6244cf6a596f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -988,7 +988,11 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) { static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II, bool IsModule) { return II.hadMacroDefinition() || II.isPoisoned() || - (!IsModule && II.getObjCOrBuiltinID()) || + (!IsModule && + II.getInterestingIdentifierID() != + tok::InterestingIdentifierKind::not_interesting && + II.getBuiltinID() != Builtin::ID::NotBuiltin && + II.getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) || II.hasRevertedTokenIDToIdentifier() || (!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) && II.getFETokenInfo()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1e86566d81fbc0..ee2b77c633ad47 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3575,7 +3575,11 @@ class ASTIdentifierTableTrait { /// to check that. bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) { if (MacroOffset || II->isPoisoned() || - (!IsModule && II->getObjCOrBuiltinID()) || + (!IsModule && + II->getInterestingIdentifierID() != + tok::InterestingIdentifierKind::not_interesting && + II->getBuiltinID() != Builtin::ID::NotBuiltin && + II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) || II->hasRevertedTokenIDToIdentifier() || (NeedDecls && II->getFETokenInfo())) return true; >From 6811d5067beea7e1df2b5834916e4e12720763fd Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov <serebrennikov.vladis...@gmail.com> Date: Fri, 17 Nov 2023 13:11:51 +0300 Subject: [PATCH 3/4] Simplify `isInterestingIdentifier` implementation --- clang/lib/Serialization/ASTReader.cpp | 7 ++----- clang/lib/Serialization/ASTWriter.cpp | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 5b6244cf6a596f..5fd0a3f5532204 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -988,11 +988,8 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) { static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II, bool IsModule) { return II.hadMacroDefinition() || II.isPoisoned() || - (!IsModule && - II.getInterestingIdentifierID() != - tok::InterestingIdentifierKind::not_interesting && - II.getBuiltinID() != Builtin::ID::NotBuiltin && - II.getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) || + (!IsModule && II.getInterestingIdentifierID() != + tok::InterestingIdentifierKind::not_interesting) || II.hasRevertedTokenIDToIdentifier() || (!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) && II.getFETokenInfo()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ee2b77c633ad47..1a9963ec674b3c 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3575,11 +3575,8 @@ class ASTIdentifierTableTrait { /// to check that. bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) { if (MacroOffset || II->isPoisoned() || - (!IsModule && - II->getInterestingIdentifierID() != - tok::InterestingIdentifierKind::not_interesting && - II->getBuiltinID() != Builtin::ID::NotBuiltin && - II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) || + (!IsModule && II->getInterestingIdentifierID() != + tok::InterestingIdentifierKind::not_interesting) || II->hasRevertedTokenIDToIdentifier() || (NeedDecls && II->getFETokenInfo())) return true; >From 93007233a27e84d26c16c6cc1241127777427d69 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov <serebrennikov.vladis...@gmail.com> Date: Sat, 10 Feb 2024 22:03:13 +0300 Subject: [PATCH 4/4] Fix boolean logic in serialization Also do refactoing in IdentifierTable.h --- clang/include/clang/Basic/IdentifierTable.h | 44 +++++++++++---------- clang/lib/Serialization/ASTReader.cpp | 9 +++-- clang/lib/Serialization/ASTWriter.cpp | 10 +++-- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 1fc091b58a49ab..fa8969eb73ddbf 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -102,7 +102,7 @@ enum class ObjCKeywordOrInterestingOrBuiltin { NotBuiltin, #define BUILTIN(ID, TYPE, ATTRS) BI##ID, -#include "clang/Basic/Builtins.def" +#include "clang/Basic/Builtins.inc" FirstTSBuiltin, NonSpecialIdentifier = 65534 @@ -341,6 +341,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { /// /// For example, 'class' will return tok::objc_class if ObjC is enabled. tok::ObjCKeywordKind getObjCKeywordID() const { + assert(0 == llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::objc_not_keyword)); auto Value = static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID); if (Value < ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS) @@ -348,6 +350,8 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { return tok::objc_not_keyword; } void setObjCKeywordID(tok::ObjCKeywordKind ID) { + assert(0 == llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::objc_not_keyword)); ObjCOrBuiltinID = ID; assert(getObjCKeywordID() == ID && "ID too large for field!"); } @@ -358,20 +362,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID); if (Value > ObjCKeywordOrInterestingOrBuiltin:: NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS && - Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier) - return static_cast<Builtin::ID>( - ObjCOrBuiltinID - 1 - - llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin:: - NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS)); + Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier) { + auto FirstBuiltin = + llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::NotBuiltin); + return static_cast<Builtin::ID>(ObjCOrBuiltinID - FirstBuiltin); + } return Builtin::ID::NotBuiltin; } void setBuiltinID(unsigned ID) { assert(ID != Builtin::ID::NotBuiltin); - ObjCOrBuiltinID = - ID + 1 + - llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin:: - NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS); + auto FirstBuiltin = + llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::NotBuiltin); + ObjCOrBuiltinID = ID + FirstBuiltin; assert(getBuiltinID() == ID && "ID too large for field!"); } void clearBuiltinID() { @@ -384,19 +386,21 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID); if (Value > ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS && Value < ObjCKeywordOrInterestingOrBuiltin:: - NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS) + NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS) { + auto FirstInterestingIdentifier = + 1 + llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS); return static_cast<tok::InterestingIdentifierKind>( - ObjCOrBuiltinID - 1 - - llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS)); - else - return tok::not_interesting; + ObjCOrBuiltinID - FirstInterestingIdentifier); + } + return tok::not_interesting; } void setInterestingIdentifierID(unsigned ID) { assert(ID != tok::not_interesting); - ObjCOrBuiltinID = ID + 1 + - llvm::to_underlying( - ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS); + auto FirstInterestingIdentifier = + 1 + llvm::to_underlying( + ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS); + ObjCOrBuiltinID = ID + FirstInterestingIdentifier; assert(getInterestingIdentifierID() == ID && "ID too large for field!"); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 79a6d0fde06362..eea14a66fa1818 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -987,10 +987,13 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) { /// Whether the given identifier is "interesting". static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II, bool IsModule) { + bool IsInteresting = + II.getInterestingIdentifierID() != + tok::InterestingIdentifierKind::not_interesting || + II.getBuiltinID() != Builtin::ID::NotBuiltin || + II.getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword; return II.hadMacroDefinition() || II.isPoisoned() || - (!IsModule && II.getInterestingIdentifierID() != - tok::InterestingIdentifierKind::not_interesting) || - II.hasRevertedTokenIDToIdentifier() || + (!IsModule && IsInteresting) || II.hasRevertedTokenIDToIdentifier() || (!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) && II.getFETokenInfo()); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index d7f7dd40810cb9..7966b3175ec9f1 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3597,9 +3597,13 @@ class ASTIdentifierTableTrait { /// doesn't check whether the name has macros defined; use PublicMacroIterator /// to check that. bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) { - if (MacroOffset || II->isPoisoned() || - (!IsModule && II->getInterestingIdentifierID() != - tok::InterestingIdentifierKind::not_interesting) || + II->getObjCOrBuiltinID(); + bool IsInteresting = + II->getInterestingIdentifierID() != + tok::InterestingIdentifierKind::not_interesting || + II->getBuiltinID() != Builtin::ID::NotBuiltin || + II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword; + if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) || II->hasRevertedTokenIDToIdentifier() || (NeedDecls && II->getFETokenInfo())) return true; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits