https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/92511
>From e8f756ec7f8ea7e5bf18cc122a965fb2f258fd15 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Tue, 14 May 2024 15:33:12 +0800 Subject: [PATCH 1/5] [Serialization] No transitive identifier change --- .../clang/Lex/ExternalPreprocessorSource.h | 54 ++++++++- clang/include/clang/Lex/HeaderSearch.h | 12 +- .../include/clang/Serialization/ASTBitCodes.h | 2 +- clang/include/clang/Serialization/ASTReader.h | 19 ++- .../include/clang/Serialization/ModuleFile.h | 3 - clang/lib/Lex/HeaderSearch.cpp | 33 +++--- clang/lib/Serialization/ASTReader.cpp | 98 ++++++++-------- clang/lib/Serialization/ASTWriter.cpp | 63 ++++++---- clang/lib/Serialization/GlobalModuleIndex.cpp | 3 +- clang/lib/Serialization/ModuleFile.cpp | 1 - .../no-transitive-identifier-change.cppm | 110 ++++++++++++++++++ 11 files changed, 286 insertions(+), 112 deletions(-) create mode 100644 clang/test/Modules/no-transitive-identifier-change.cppm diff --git a/clang/include/clang/Lex/ExternalPreprocessorSource.h b/clang/include/clang/Lex/ExternalPreprocessorSource.h index 6775841860373..48429948dbffe 100644 --- a/clang/include/clang/Lex/ExternalPreprocessorSource.h +++ b/clang/include/clang/Lex/ExternalPreprocessorSource.h @@ -36,12 +36,64 @@ class ExternalPreprocessorSource { /// Return the identifier associated with the given ID number. /// /// The ID 0 is associated with the NULL identifier. - virtual IdentifierInfo *GetIdentifier(unsigned ID) = 0; + virtual IdentifierInfo *GetIdentifier(uint64_t ID) = 0; /// Map a module ID to a module. virtual Module *getModule(unsigned ModuleID) = 0; }; +// Either a pointer to an IdentifierInfo of the controlling macro or the ID +// number of the controlling macro. +class LazyIdentifierInfoPtr { + // If the low bit is clear, a pointer to the IdentifierInfo. If the low + // bit is set, the upper 63 bits are the ID number. + mutable uint64_t Ptr = 0; + +public: + LazyIdentifierInfoPtr() = default; + + explicit LazyIdentifierInfoPtr(const IdentifierInfo *Ptr) + : Ptr(reinterpret_cast<uint64_t>(Ptr)) {} + + explicit LazyIdentifierInfoPtr(uint64_t ID) : Ptr((ID << 1) | 0x01) { + assert((ID << 1 >> 1) == ID && "ID must require < 63 bits"); + if (ID == 0) + Ptr = 0; + } + + LazyIdentifierInfoPtr &operator=(const IdentifierInfo *Ptr) { + this->Ptr = reinterpret_cast<uint64_t>(Ptr); + return *this; + } + + LazyIdentifierInfoPtr &operator=(uint64_t ID) { + assert((ID << 1 >> 1) == ID && "IDs must require < 63 bits"); + if (ID == 0) + Ptr = 0; + else + Ptr = (ID << 1) | 0x01; + + return *this; + } + + /// Whether this pointer is non-NULL. + /// + /// This operation does not require the AST node to be deserialized. + bool isValid() const { return Ptr != 0; } + + /// Whether this pointer is currently stored as ID. + bool isID() const { return Ptr & 0x01; } + + IdentifierInfo *getPtr() const { + assert(!isID()); + return reinterpret_cast<IdentifierInfo *>(Ptr); + } + + uint64_t getID() const { + assert(isID()); + return Ptr >> 1; + } +}; } #endif diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 5ac63dddd4d4e..65700b8f9dc11 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -16,6 +16,7 @@ #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/DirectoryLookup.h" +#include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderMap.h" #include "clang/Lex/ModuleMap.h" #include "llvm/ADT/ArrayRef.h" @@ -119,13 +120,6 @@ struct HeaderFileInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsValid : 1; - /// The ID number of the controlling macro. - /// - /// This ID number will be non-zero when there is a controlling - /// macro whose IdentifierInfo may not yet have been loaded from - /// external storage. - unsigned ControllingMacroID = 0; - /// If this file has a \#ifndef XXX (or equivalent) guard that /// protects the entire contents of the file, this is the identifier /// for the macro that controls whether or not it has any effect. @@ -134,7 +128,7 @@ struct HeaderFileInfo { /// the controlling macro of this header, since /// getControllingMacro() is able to load a controlling macro from /// external storage. - const IdentifierInfo *ControllingMacro = nullptr; + LazyIdentifierInfoPtr LazyControllingMacro; /// If this header came from a framework include, this is the name /// of the framework. @@ -580,7 +574,7 @@ class HeaderSearch { /// no-op \#includes. void SetFileControllingMacro(FileEntryRef File, const IdentifierInfo *ControllingMacro) { - getFileInfo(File).ControllingMacro = ControllingMacro; + getFileInfo(File).LazyControllingMacro = ControllingMacro; } /// Determine whether this file is intended to be safe from diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 902c4470650c5..03bac9be2bf3d 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -59,7 +59,7 @@ const unsigned VERSION_MINOR = 1; /// /// The ID numbers of identifiers are consecutive (in order of discovery) /// and start at 1. 0 is reserved for NULL. -using IdentifierID = uint32_t; +using IdentifierID = uint64_t; /// The number of predefined identifier IDs. const unsigned int NUM_PREDEF_IDENT_IDS = 1; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 0a9006223dcbd..3f38a08f0da3a 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -660,14 +660,6 @@ class ASTReader /// been loaded. std::vector<IdentifierInfo *> IdentifiersLoaded; - using GlobalIdentifierMapType = - ContinuousRangeMap<serialization::IdentifierID, ModuleFile *, 4>; - - /// Mapping from global identifier IDs to the module in which the - /// identifier resides along with the offset that should be added to the - /// global identifier ID to produce a local ID. - GlobalIdentifierMapType GlobalIdentifierMap; - /// A vector containing macros that have already been /// loaded. /// @@ -1539,6 +1531,11 @@ class ASTReader /// Translate a \param GlobalDeclID to the index of DeclsLoaded array. unsigned translateGlobalDeclIDToIndex(GlobalDeclID ID) const; + /// Translate an \param IdentifierID ID to the index of IdentifiersLoaded + /// array and the corresponding module file. + std::pair<ModuleFile *, unsigned> + translateIdentifierIDToIndex(serialization::IdentifierID ID) const; + public: /// Load the AST file and validate its contents against the given /// Preprocessor. @@ -2123,7 +2120,7 @@ class ASTReader /// Load a selector from disk, registering its ID if it exists. void LoadSelector(Selector Sel); - void SetIdentifierInfo(unsigned ID, IdentifierInfo *II); + void SetIdentifierInfo(serialization::IdentifierID ID, IdentifierInfo *II); void SetGloballyVisibleDecls(IdentifierInfo *II, const SmallVectorImpl<GlobalDeclID> &DeclIDs, SmallVectorImpl<Decl *> *Decls = nullptr); @@ -2150,10 +2147,10 @@ class ASTReader return DecodeIdentifierInfo(ID); } - IdentifierInfo *getLocalIdentifier(ModuleFile &M, unsigned LocalID); + IdentifierInfo *getLocalIdentifier(ModuleFile &M, uint64_t LocalID); serialization::IdentifierID getGlobalIdentifierID(ModuleFile &M, - unsigned LocalID); + uint64_t LocalID); void resolvePendingMacro(IdentifierInfo *II, const PendingMacroInfo &PMInfo); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 56193d44dd6f3..3787f4eeb8a8b 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -310,9 +310,6 @@ class ModuleFile { /// Base identifier ID for identifiers local to this module. serialization::IdentifierID BaseIdentifierID = 0; - /// Remapping table for identifier IDs in this module. - ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; - /// Actual data for the on-disk hash table of identifiers. /// /// This pointer points into a memory buffer, where the on-disk hash diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 574723b33866a..addcb6ce2b635 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -60,19 +60,21 @@ ALWAYS_ENABLED_STATISTIC(NumSubFrameworkLookups, const IdentifierInfo * HeaderFileInfo::getControllingMacro(ExternalPreprocessorSource *External) { - if (ControllingMacro) { - if (ControllingMacro->isOutOfDate()) { - assert(External && "We must have an external source if we have a " - "controlling macro that is out of date."); - External->updateOutOfDateIdentifier(*ControllingMacro); - } - return ControllingMacro; - } + if (LazyControllingMacro.isID()) { + if (!External) + return nullptr; - if (!ControllingMacroID || !External) - return nullptr; + LazyControllingMacro = + External->GetIdentifier(LazyControllingMacro.getID()); + return LazyControllingMacro.getPtr(); + } - ControllingMacro = External->GetIdentifier(ControllingMacroID); + IdentifierInfo *ControllingMacro = LazyControllingMacro.getPtr(); + if (ControllingMacro && ControllingMacro->isOutOfDate()) { + assert(External && "We must have an external source if we have a " + "controlling macro that is out of date."); + External->updateOutOfDateIdentifier(*ControllingMacro); + } return ControllingMacro; } @@ -1341,10 +1343,8 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI, mergeHeaderFileInfoModuleBits(HFI, OtherHFI.isModuleHeader, OtherHFI.isTextualModuleHeader); - if (!HFI.ControllingMacro && !HFI.ControllingMacroID) { - HFI.ControllingMacro = OtherHFI.ControllingMacro; - HFI.ControllingMacroID = OtherHFI.ControllingMacroID; - } + if (!HFI.LazyControllingMacro.isValid()) + HFI.LazyControllingMacro = OtherHFI.LazyControllingMacro; HFI.DirInfo = OtherHFI.DirInfo; HFI.External = (!HFI.IsValid || HFI.External); @@ -1419,8 +1419,7 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const { // once. Note that we dor't check for #import, because that's not a property // of the file itself. if (auto *HFI = getExistingFileInfo(File)) - return HFI->isPragmaOnce || HFI->ControllingMacro || - HFI->ControllingMacroID; + return HFI->isPragmaOnce || HFI->LazyControllingMacro.isValid(); return false; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index a5a747f68f014..29a7fcb2cb507 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -920,7 +920,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { SelectorTable &SelTable = Reader.getContext().Selectors; unsigned N = endian::readNext<uint16_t, llvm::endianness::little>(d); const IdentifierInfo *FirstII = Reader.getLocalIdentifier( - F, endian::readNext<uint32_t, llvm::endianness::little>(d)); + F, endian::readNext<IdentifierID, llvm::endianness::little>(d)); if (N == 0) return SelTable.getNullarySelector(FirstII); else if (N == 1) @@ -930,7 +930,7 @@ ASTSelectorLookupTrait::ReadKey(const unsigned char* d, unsigned) { Args.push_back(FirstII); for (unsigned I = 1; I != N; ++I) Args.push_back(Reader.getLocalIdentifier( - F, endian::readNext<uint32_t, llvm::endianness::little>(d))); + F, endian::readNext<IdentifierID, llvm::endianness::little>(d))); return SelTable.getSelector(N, Args.data()); } @@ -1011,7 +1011,8 @@ static bool readBit(unsigned &Bits) { IdentifierID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) { using namespace llvm::support; - unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d); + IdentifierID RawID = + endian::readNext<IdentifierID, llvm::endianness::little>(d); return Reader.getGlobalIdentifierID(F, RawID >> 1); } @@ -1029,9 +1030,12 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, unsigned DataLen) { using namespace llvm::support; - unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d); + IdentifierID RawID = + endian::readNext<IdentifierID, llvm::endianness::little>(d); bool IsInteresting = RawID & 0x01; + DataLen -= sizeof(IdentifierID); + // Wipe out the "is interesting" bit. RawID = RawID >> 1; @@ -1062,7 +1066,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k, bool HadMacroDefinition = readBit(Bits); assert(Bits == 0 && "Extra bits in the identifier?"); - DataLen -= 8; + DataLen -= sizeof(uint16_t) * 2; // Set or check the various bits in the IdentifierInfo structure. // Token IDs are read-only. @@ -1188,7 +1192,7 @@ ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) { case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: Data = (uint64_t)Reader.getLocalIdentifier( - F, endian::readNext<uint32_t, llvm::endianness::little>(d)); + F, endian::readNext<IdentifierID, llvm::endianness::little>(d)); break; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: @@ -2054,8 +2058,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, HFI.isPragmaOnce |= (Flags >> 4) & 0x01; HFI.DirInfo = (Flags >> 1) & 0x07; HFI.IndexHeaderMapHeader = Flags & 0x01; - HFI.ControllingMacroID = Reader.getGlobalIdentifierID( - M, endian::readNext<uint32_t, llvm::endianness::little>(d)); + HFI.LazyControllingMacro = Reader.getGlobalIdentifierID( + M, endian::readNext<IdentifierID, llvm::endianness::little>(d)); if (unsigned FrameworkOffset = endian::readNext<uint32_t, llvm::endianness::little>(d)) { // The framework offset is 1 greater than the actual offset, @@ -3429,24 +3433,11 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, "duplicate IDENTIFIER_OFFSET record in AST file"); F.IdentifierOffsets = (const uint32_t *)Blob.data(); F.LocalNumIdentifiers = Record[0]; - unsigned LocalBaseIdentifierID = Record[1]; F.BaseIdentifierID = getTotalNumIdentifiers(); - if (F.LocalNumIdentifiers > 0) { - // Introduce the global -> local mapping for identifiers within this - // module. - GlobalIdentifierMap.insert(std::make_pair(getTotalNumIdentifiers() + 1, - &F)); - - // Introduce the local -> global mapping for identifiers within this - // module. - F.IdentifierRemap.insertOrReplace( - std::make_pair(LocalBaseIdentifierID, - F.BaseIdentifierID - LocalBaseIdentifierID)); - + if (F.LocalNumIdentifiers > 0) IdentifiersLoaded.resize(IdentifiersLoaded.size() + F.LocalNumIdentifiers); - } break; } @@ -4038,7 +4029,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { F.ModuleOffsetMap = StringRef(); using RemapBuilder = ContinuousRangeMap<uint32_t, int, 2>::Builder; - RemapBuilder IdentifierRemap(F.IdentifierRemap); RemapBuilder MacroRemap(F.MacroRemap); RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); RemapBuilder SubmoduleRemap(F.SubmoduleRemap); @@ -4071,8 +4061,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { ImportedModuleVector.push_back(OM); - uint32_t IdentifierIDOffset = - endian::readNext<uint32_t, llvm::endianness::little>(Data); uint32_t MacroIDOffset = endian::readNext<uint32_t, llvm::endianness::little>(Data); uint32_t PreprocessedEntityIDOffset = @@ -4092,7 +4080,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { static_cast<int>(BaseOffset - Offset))); }; - mapOffset(IdentifierIDOffset, OM->BaseIdentifierID, IdentifierRemap); mapOffset(MacroIDOffset, OM->BaseMacroID, MacroRemap); mapOffset(PreprocessedEntityIDOffset, OM->BasePreprocessedEntityID, PreprocessedEntityRemap); @@ -8181,7 +8168,6 @@ LLVM_DUMP_METHOD void ASTReader::dump() { dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap); dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap); dumpModuleIDMap("Global type map", GlobalTypeMap); - dumpModuleIDMap("Global identifier map", GlobalIdentifierMap); dumpModuleIDMap("Global macro map", GlobalMacroMap); dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap); dumpModuleIDMap("Global selector map", GlobalSelectorMap); @@ -8822,8 +8808,9 @@ void ASTReader::LoadSelector(Selector Sel) { void ASTReader::SetIdentifierInfo(IdentifierID ID, IdentifierInfo *II) { assert(ID && "Non-zero identifier ID required"); - assert(ID <= IdentifiersLoaded.size() && "identifier ID out of range"); - IdentifiersLoaded[ID - 1] = II; + unsigned Index = translateIdentifierIDToIndex(ID).second; + assert(Index < IdentifiersLoaded.size() && "identifier ID out of range"); + IdentifiersLoaded[Index] = II; if (DeserializationListener) DeserializationListener->IdentifierRead(ID, II); } @@ -8876,6 +8863,22 @@ void ASTReader::SetGloballyVisibleDecls( } } +std::pair<ModuleFile *, unsigned> +ASTReader::translateIdentifierIDToIndex(IdentifierID ID) const { + if (ID == 0) + return {nullptr, 0}; + + unsigned ModuleFileIndex = ID >> 32; + unsigned LocalID = ID & llvm::maskTrailingOnes<IdentifierID>(32); + + assert(ModuleFileIndex && "not translating loaded IdentifierID?"); + assert(getModuleManager().size() > ModuleFileIndex - 1); + + ModuleFile &MF = getModuleManager()[ModuleFileIndex - 1]; + assert(LocalID < MF.LocalNumIdentifiers); + return {&MF, MF.BaseIdentifierID + LocalID}; +} + IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) { if (ID == 0) return nullptr; @@ -8885,45 +8888,48 @@ IdentifierInfo *ASTReader::DecodeIdentifierInfo(IdentifierID ID) { return nullptr; } - ID -= 1; - if (!IdentifiersLoaded[ID]) { - GlobalIdentifierMapType::iterator I = GlobalIdentifierMap.find(ID + 1); - assert(I != GlobalIdentifierMap.end() && "Corrupted global identifier map"); - ModuleFile *M = I->second; - unsigned Index = ID - M->BaseIdentifierID; + auto [M, Index] = translateIdentifierIDToIndex(ID); + if (!IdentifiersLoaded[Index]) { + assert(M != nullptr && "Untranslated Identifier ID?"); + assert(Index >= M->BaseIdentifierID); + unsigned LocalIndex = Index - M->BaseIdentifierID; const unsigned char *Data = - M->IdentifierTableData + M->IdentifierOffsets[Index]; + M->IdentifierTableData + M->IdentifierOffsets[LocalIndex]; ASTIdentifierLookupTrait Trait(*this, *M); auto KeyDataLen = Trait.ReadKeyDataLength(Data); auto Key = Trait.ReadKey(Data, KeyDataLen.first); auto &II = PP.getIdentifierTable().get(Key); - IdentifiersLoaded[ID] = &II; + IdentifiersLoaded[Index] = &II; markIdentifierFromAST(*this, II); if (DeserializationListener) - DeserializationListener->IdentifierRead(ID + 1, &II); + DeserializationListener->IdentifierRead(ID, &II); } - return IdentifiersLoaded[ID]; + return IdentifiersLoaded[Index]; } -IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, unsigned LocalID) { +IdentifierInfo *ASTReader::getLocalIdentifier(ModuleFile &M, uint64_t LocalID) { return DecodeIdentifierInfo(getGlobalIdentifierID(M, LocalID)); } -IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, unsigned LocalID) { +IdentifierID ASTReader::getGlobalIdentifierID(ModuleFile &M, uint64_t LocalID) { if (LocalID < NUM_PREDEF_IDENT_IDS) return LocalID; if (!M.ModuleOffsetMap.empty()) ReadModuleOffsetMap(M); - ContinuousRangeMap<uint32_t, int, 2>::iterator I - = M.IdentifierRemap.find(LocalID - NUM_PREDEF_IDENT_IDS); - assert(I != M.IdentifierRemap.end() - && "Invalid index into identifier index remap"); + unsigned ModuleFileIndex = LocalID >> 32; + LocalID &= llvm::maskTrailingOnes<IdentifierID>(32); + ModuleFile *MF = + ModuleFileIndex ? M.TransitiveImports[ModuleFileIndex - 1] : &M; + assert(MF && "malformed identifier ID encoding?"); - return LocalID + I->second; + if (!ModuleFileIndex) + LocalID -= NUM_PREDEF_IDENT_IDS; + + return ((IdentifierID)(MF->Index + 1) << 32) | LocalID; } MacroInfo *ASTReader::getMacro(MacroID ID) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b8b613db712f4..dd6c135d54b46 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1991,7 +1991,7 @@ namespace { std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) { unsigned KeyLen = key.Filename.size() + 1 + 8 + 8; - unsigned DataLen = 1 + 4 + 4; + unsigned DataLen = 1 + sizeof(IdentifierID) + 4; for (auto ModInfo : Data.KnownHeaders) if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) DataLen += 4; @@ -2026,10 +2026,11 @@ namespace { | Data.HFI.IndexHeaderMapHeader; LE.write<uint8_t>(Flags); - if (!Data.HFI.ControllingMacro) - LE.write<uint32_t>(Data.HFI.ControllingMacroID); + if (Data.HFI.LazyControllingMacro.isID()) + LE.write<IdentifierID>(Data.HFI.LazyControllingMacro.getID()); else - LE.write<uint32_t>(Writer.getIdentifierRef(Data.HFI.ControllingMacro)); + LE.write<IdentifierID>( + Writer.getIdentifierRef(Data.HFI.LazyControllingMacro.getPtr())); unsigned Offset = 0; if (!Data.HFI.Framework.empty()) { @@ -3452,7 +3453,9 @@ class ASTMethodPoolTrait { std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream& Out, Selector Sel, data_type_ref Methods) { - unsigned KeyLen = 2 + (Sel.getNumArgs()? Sel.getNumArgs() * 4 : 4); + unsigned KeyLen = + 2 + (Sel.getNumArgs() ? Sel.getNumArgs() * sizeof(IdentifierID) + : sizeof(IdentifierID)); unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) @@ -3477,7 +3480,7 @@ class ASTMethodPoolTrait { if (N == 0) N = 1; for (unsigned I = 0; I != N; ++I) - LE.write<uint32_t>( + LE.write<IdentifierID>( Writer.getIdentifierRef(Sel.getIdentifierInfoForSlot(I))); } @@ -3788,7 +3791,7 @@ class ASTIdentifierTableTrait { InterestingIdentifierOffsets->push_back(Out.tell()); unsigned KeyLen = II->getLength() + 1; - unsigned DataLen = 4; // 4 bytes for the persistent ID << 1 + unsigned DataLen = sizeof(IdentifierID); // bytes for the persistent ID << 1 if (isInterestingIdentifier(II, MacroOffset)) { DataLen += 2; // 2 bytes for builtin ID DataLen += 2; // 2 bytes for flags @@ -3814,11 +3817,11 @@ class ASTIdentifierTableTrait { auto MacroOffset = Writer.getMacroDirectivesOffset(II); if (!isInterestingIdentifier(II, MacroOffset)) { - LE.write<uint32_t>(ID << 1); + LE.write<IdentifierID>(ID << 1); return; } - LE.write<uint32_t>((ID << 1) | 0x01); + LE.write<IdentifierID>((ID << 1) | 0x01); uint32_t Bits = (uint32_t)II->getObjCOrBuiltinID(); assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader."); LE.write<uint16_t>(Bits); @@ -3851,6 +3854,10 @@ class ASTIdentifierTableTrait { } // namespace +/// If the \param IdentifierID ID is a local Identifier ID. If the higher +/// bits of ID is 0, it implies that the ID doesn't come from AST files. +static bool isLocalIdentifierID(IdentifierID ID) { return !(ID >> 32); } + /// Write the identifier table into the AST file. /// /// The identifier table consists of a blob containing string data @@ -3895,7 +3902,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, // Write out identifiers if either the ID is local or the identifier has // changed since it was loaded. - if (ID >= FirstIdentID || II->hasChangedSinceDeserialization() || + if (isLocalIdentifierID(ID) || II->hasChangedSinceDeserialization() || (Trait.needDecls() && II->hasFETokenInfoChangedSinceDeserialization())) Generator.insert(II, ID, Trait); @@ -3929,7 +3936,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, auto Abbrev = std::make_shared<BitCodeAbbrev>(); Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned IdentifierOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); @@ -3939,8 +3945,7 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, #endif RecordData::value_type Record[] = {IDENTIFIER_OFFSET, - IdentifierOffsets.size(), - FirstIdentID - NUM_PREDEF_IDENT_IDS}; + IdentifierOffsets.size()}; Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record, bytes(IdentifierOffsets)); @@ -4030,11 +4035,13 @@ class ASTDeclContextNameLookupTrait { unsigned KeyLen = 1; switch (Name.getKind()) { case DeclarationName::Identifier: + case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXDeductionGuideName: + KeyLen += sizeof(IdentifierID); + break; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: case DeclarationName::ObjCMultiArgSelector: - case DeclarationName::CXXLiteralOperatorName: - case DeclarationName::CXXDeductionGuideName: KeyLen += 4; break; case DeclarationName::CXXOperatorName: @@ -4062,7 +4069,7 @@ class ASTDeclContextNameLookupTrait { case DeclarationName::Identifier: case DeclarationName::CXXLiteralOperatorName: case DeclarationName::CXXDeductionGuideName: - LE.write<uint32_t>(Writer.getIdentifierRef(Name.getIdentifier())); + LE.write<IdentifierID>(Writer.getIdentifierRef(Name.getIdentifier())); return; case DeclarationName::ObjCZeroArgSelector: case DeclarationName::ObjCOneArgSelector: @@ -4780,8 +4787,15 @@ void ASTWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) { IdentifierID ID = IdentifierIDs[II]; // Only store offsets new to this AST file. Other identifier names are looked // up earlier in the chain and thus don't need an offset. - if (ID >= FirstIdentID) - IdentifierOffsets[ID - FirstIdentID] = Offset; + if (!isLocalIdentifierID(ID)) + return; + + // For local identifiers, the module file index must be 0. + + assert(ID != 0); + ID -= NUM_PREDEF_IDENT_IDS; + assert(ID < IdentifierOffsets.size()); + IdentifierOffsets[ID] = Offset; } /// Note that the selector Sel occurs at the given offset @@ -5415,7 +5429,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. - writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); writeBaseIDOrNone(M.BasePreprocessedEntityID, M.NumPreprocessedEntities); @@ -6614,20 +6627,26 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) { // Note, this will get called multiple times, once one the reader starts up // and again each time it's done reading a PCH or module. FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes(); - FirstIdentID = NUM_PREDEF_IDENT_IDS + Chain->getTotalNumIdentifiers(); FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros(); FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules(); FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors(); NextTypeID = FirstTypeID; - NextIdentID = FirstIdentID; NextMacroID = FirstMacroID; NextSelectorID = FirstSelectorID; NextSubmoduleID = FirstSubmoduleID; } void ASTWriter::IdentifierRead(IdentifierID ID, IdentifierInfo *II) { - // Always keep the highest ID. See \p TypeRead() for more information. IdentifierID &StoredID = IdentifierIDs[II]; + unsigned OriginalModuleFileIndex = StoredID >> 32; + + // Always keep the local identifier ID. See \p TypeRead() for more + // information. + if (OriginalModuleFileIndex == 0 && StoredID) + return; + + // Otherwise, keep the highest ID since the module file comes later has + // higher module file indexes. if (ID > StoredID) StoredID = ID; } diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp index f09ceb8d31620..1163943c5dffa 100644 --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -510,7 +510,8 @@ namespace { // The first bit indicates whether this identifier is interesting. // That's all we care about. using namespace llvm::support; - unsigned RawID = endian::readNext<uint32_t, llvm::endianness::little>(d); + IdentifierID RawID = + endian::readNext<IdentifierID, llvm::endianness::little>(d); bool IsInteresting = RawID & 0x01; return std::make_pair(k, IsInteresting); } diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index f64a59bd94891..7976c28b28671 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -62,7 +62,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { llvm::errs() << " Base identifier ID: " << BaseIdentifierID << '\n' << " Number of identifiers: " << LocalNumIdentifiers << '\n'; - dumpLocalRemap("Identifier ID local -> global map", IdentifierRemap); llvm::errs() << " Base macro ID: " << BaseMacroID << '\n' << " Number of macros: " << LocalNumMacros << '\n'; diff --git a/clang/test/Modules/no-transitive-identifier-change.cppm b/clang/test/Modules/no-transitive-identifier-change.cppm new file mode 100644 index 0000000000000..97e8ac4444fdd --- /dev/null +++ b/clang/test/Modules/no-transitive-identifier-change.cppm @@ -0,0 +1,110 @@ +// Testing that adding an new identifier in an unused module file won't change +// the BMI of the current module file. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/m-partA.cppm -emit-reduced-module-interface -o %t/m-partA.pcm +// RUN: %clang_cc1 -std=c++20 %t/m-partA.v1.cppm -emit-reduced-module-interface -o \ +// RUN: %t/m-partA.v1.pcm +// RUN: %clang_cc1 -std=c++20 %t/m-partB.cppm -emit-reduced-module-interface -o %t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fmodule-file=m:partA=%t/m-partA.pcm -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.v1.pcm \ +// RUN: -fmodule-file=m:partA=%t/m-partA.v1.pcm -fmodule-file=m:partB=%t/m-partB.pcm +// +// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.pcm \ +// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.v1.pcm \ +// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// Since useBOnly only uses partB from module M, the change in partA shouldn't affect +// useBOnly. +// RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null +// +// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.pcm \ +// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.v1.pcm \ +// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// useAOnly should differ +// RUN: not diff %t/useAOnly.pcm %t/useAOnly.v1.pcm &> /dev/null + +//--- m-partA.cppm +export module m:partA; + +export inline int getA() { + return 43; +} + +export class A { +public: + int getMem(); +}; + +export template <typename T> +class ATempl { +public: + T getT(); +}; + +//--- m-partA.v1.cppm +export module m:partA; + +export inline int getA() { + return 43; +} + +// Now we add a new declaration without introducing a new type. +// The consuming module which didn't use m:partA completely is expected to be +// not changed. +export inline int getA2() { + return 88; +} + +export class A { +public: + int getMem(); + // Now we add a new declaration without introducing a new type. + // The consuming module which didn't use m:partA completely is expected to be + // not changed. + int getMem2(); +}; + +export template <typename T> +class ATempl { +public: + T getT(); + // Add a new declaration without introducing a new type. + T getT2(); +}; + +//--- m-partB.cppm +export module m:partB; + +export inline int getB() { + return 430; +} + +//--- m.cppm +export module m; +export import :partA; +export import :partB; + +//--- useBOnly.cppm +export module useBOnly; +import m; + +export inline int get() { + return getB(); +} + +//--- useAOnly.cppm +export module useAOnly; +import m; + +export inline int get() { + return getA(); +} >From 57cfb2b7791666022ee46201b5126ac610c19bdd Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Fri, 17 May 2024 14:25:53 +0800 Subject: [PATCH 2/5] [serialization] No transitive type change --- .../include/clang/Serialization/ASTBitCodes.h | 32 ++++-- clang/include/clang/Serialization/ASTReader.h | 23 ++-- .../clang/Serialization/ASTRecordReader.h | 2 +- .../include/clang/Serialization/ModuleFile.h | 3 - clang/lib/Serialization/ASTReader.cpp | 104 +++++++++--------- clang/lib/Serialization/ASTWriter.cpp | 31 +++--- clang/lib/Serialization/ModuleFile.cpp | 1 - .../Modules/no-transitive-decls-change.cppm | 12 +- .../no-transitive-identifier-change.cppm | 3 - .../Modules/no-transitive-type-change.cppm | 68 ++++++++++++ clang/test/Modules/pr59999.cppm | 36 +++--- 11 files changed, 196 insertions(+), 119 deletions(-) create mode 100644 clang/test/Modules/no-transitive-type-change.cppm diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 03bac9be2bf3d..13ee1e4724f1e 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -26,6 +26,7 @@ #include "clang/Serialization/SourceLocationEncoding.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Bitstream/BitCodes.h" +#include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> @@ -70,38 +71,53 @@ using DeclID = DeclIDBase::DeclID; /// An ID number that refers to a type in an AST file. /// -/// The ID of a type is partitioned into two parts: the lower +/// The ID of a type is partitioned into three parts: +/// - the lower /// three bits are used to store the const/volatile/restrict -/// qualifiers (as with QualType) and the upper bits provide a -/// type index. The type index values are partitioned into two +/// qualifiers (as with QualType). +/// - the upper 29 bits provide a type index in the corresponding +/// module file. +/// - the upper 32 bits provide a module file index. +/// +/// The type index values are partitioned into two /// sets. The values below NUM_PREDEF_TYPE_IDs are predefined type /// IDs (based on the PREDEF_TYPE_*_ID constants), with 0 as a /// placeholder for "no type". Values from NUM_PREDEF_TYPE_IDs are /// other types that have serialized representations. -using TypeID = uint32_t; +using TypeID = uint64_t; /// A type index; the type ID with the qualifier bits removed. +/// Keep structure alignment 32-bit since the blob is assumed as 32-bit +/// aligned. class TypeIdx { + uint32_t ModuleFileIndex = 0; uint32_t Idx = 0; public: TypeIdx() = default; - explicit TypeIdx(uint32_t index) : Idx(index) {} + explicit TypeIdx(uint32_t Idx) : ModuleFileIndex(0), Idx(Idx) {} + + explicit TypeIdx(uint32_t ModuleFileIdx, uint32_t Idx) + : ModuleFileIndex(ModuleFileIdx), Idx(Idx) {} + + uint32_t getModuleFileIndex() const { return ModuleFileIndex; } - uint32_t getIndex() const { return Idx; } + uint64_t getValue() const { return ((uint64_t)ModuleFileIndex << 32) | Idx; } TypeID asTypeID(unsigned FastQuals) const { if (Idx == uint32_t(-1)) return TypeID(-1); - return (Idx << Qualifiers::FastWidth) | FastQuals; + unsigned Index = (Idx << Qualifiers::FastWidth) | FastQuals; + return ((uint64_t)ModuleFileIndex << 32) | Index; } static TypeIdx fromTypeID(TypeID ID) { if (ID == TypeID(-1)) return TypeIdx(-1); - return TypeIdx(ID >> Qualifiers::FastWidth); + return TypeIdx(ID >> 32, (ID & llvm::maskTrailingOnes<TypeID>(32)) >> + Qualifiers::FastWidth); } }; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 3f38a08f0da3a..f4180984eaad3 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -490,14 +490,6 @@ class ASTReader /// ID = (I + 1) << FastQual::Width has already been loaded llvm::PagedVector<QualType> TypesLoaded; - using GlobalTypeMapType = - ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>; - - /// Mapping from global type IDs to the module in which the - /// type resides along with the offset that should be added to the - /// global type ID to produce a local ID. - GlobalTypeMapType GlobalTypeMap; - /// Declarations that have already been loaded from the chain. /// /// When the pointer at index I is non-NULL, the declaration with ID @@ -1423,8 +1415,8 @@ class ASTReader RecordLocation(ModuleFile *M, uint64_t O) : F(M), Offset(O) {} }; - QualType readTypeRecord(unsigned Index); - RecordLocation TypeCursorForIndex(unsigned Index); + QualType readTypeRecord(serialization::TypeID ID); + RecordLocation TypeCursorForIndex(serialization::TypeID ID); void LoadedDecl(unsigned Index, Decl *D); Decl *ReadDeclRecord(GlobalDeclID ID); void markIncompleteDeclChain(Decl *D); @@ -1536,6 +1528,11 @@ class ASTReader std::pair<ModuleFile *, unsigned> translateIdentifierIDToIndex(serialization::IdentifierID ID) const; + /// Translate an \param TypeID ID to the index of TypesLoaded + /// array and the corresponding module file. + std::pair<ModuleFile *, unsigned> + translateTypeIDToIndex(serialization::TypeID ID) const; + public: /// Load the AST file and validate its contents against the given /// Preprocessor. @@ -1884,10 +1881,11 @@ class ASTReader QualType GetType(serialization::TypeID ID); /// Resolve a local type ID within a given AST file into a type. - QualType getLocalType(ModuleFile &F, unsigned LocalID); + QualType getLocalType(ModuleFile &F, serialization::TypeID LocalID); /// Map a local type ID within a given AST file into a global type ID. - serialization::TypeID getGlobalTypeID(ModuleFile &F, unsigned LocalID) const; + serialization::TypeID getGlobalTypeID(ModuleFile &F, + serialization::TypeID LocalID) const; /// Read a type from the current position in the given record, which /// was read from the given AST file. @@ -1909,6 +1907,7 @@ class ASTReader /// if the declaration is not from a module file. ModuleFile *getOwningModuleFile(const Decl *D) const; ModuleFile *getOwningModuleFile(GlobalDeclID ID) const; + ModuleFile *getOwningModuleFile(serialization::TypeID ID) const; /// Returns the source location for the decl \p ID. SourceLocation getSourceLocationForDeclID(GlobalDeclID ID); diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h index d00fb182f05f4..2561418b78ca7 100644 --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -163,7 +163,7 @@ class ASTRecordReader void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); /// Map a local type ID within a given AST file to a global type ID. - serialization::TypeID getGlobalTypeID(unsigned LocalID) const { + serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const { return Reader->getGlobalTypeID(*F, LocalID); } diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h index 3787f4eeb8a8b..3e920c0f68360 100644 --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -482,9 +482,6 @@ class ModuleFile { /// the global type ID space. serialization::TypeID BaseTypeIndex = 0; - /// Remapping table for type IDs in this module. - ContinuousRangeMap<uint32_t, int, 2> TypeRemap; - // === Miscellaneous === /// Diagnostic IDs and their mappings that the user changed. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 29a7fcb2cb507..9d05037b89392 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3357,20 +3357,11 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, "duplicate TYPE_OFFSET record in AST file"); F.TypeOffsets = reinterpret_cast<const UnalignedUInt64 *>(Blob.data()); F.LocalNumTypes = Record[0]; - unsigned LocalBaseTypeIndex = Record[1]; F.BaseTypeIndex = getTotalNumTypes(); - if (F.LocalNumTypes > 0) { - // Introduce the global -> local mapping for types within this module. - GlobalTypeMap.insert(std::make_pair(getTotalNumTypes(), &F)); - - // Introduce the local -> global mapping for types within this module. - F.TypeRemap.insertOrReplace( - std::make_pair(LocalBaseTypeIndex, - F.BaseTypeIndex - LocalBaseTypeIndex)); - + if (F.LocalNumTypes > 0) TypesLoaded.resize(TypesLoaded.size() + F.LocalNumTypes); - } + break; } @@ -4033,7 +4024,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { RemapBuilder PreprocessedEntityRemap(F.PreprocessedEntityRemap); RemapBuilder SubmoduleRemap(F.SubmoduleRemap); RemapBuilder SelectorRemap(F.SelectorRemap); - RemapBuilder TypeRemap(F.TypeRemap); auto &ImportedModuleVector = F.TransitiveImports; assert(ImportedModuleVector.empty()); @@ -4069,8 +4059,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { endian::readNext<uint32_t, llvm::endianness::little>(Data); uint32_t SelectorIDOffset = endian::readNext<uint32_t, llvm::endianness::little>(Data); - uint32_t TypeIndexOffset = - endian::readNext<uint32_t, llvm::endianness::little>(Data); auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset, RemapBuilder &Remap) { @@ -4085,7 +4073,6 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const { PreprocessedEntityRemap); mapOffset(SubmoduleIDOffset, OM->BaseSubmoduleID, SubmoduleRemap); mapOffset(SelectorIDOffset, OM->BaseSelectorID, SelectorRemap); - mapOffset(TypeIndexOffset, OM->BaseTypeIndex, TypeRemap); } } @@ -5064,12 +5051,12 @@ void ASTReader::InitializeContext() { // Load the special types. if (SpecialTypes.size() >= NumSpecialTypeIDs) { - if (unsigned String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) { + if (TypeID String = SpecialTypes[SPECIAL_TYPE_CF_CONSTANT_STRING]) { if (!Context.CFConstantStringTypeDecl) Context.setCFConstantStringType(GetType(String)); } - if (unsigned File = SpecialTypes[SPECIAL_TYPE_FILE]) { + if (TypeID File = SpecialTypes[SPECIAL_TYPE_FILE]) { QualType FileType = GetType(File); if (FileType.isNull()) { Error("FILE type is NULL"); @@ -5090,7 +5077,7 @@ void ASTReader::InitializeContext() { } } - if (unsigned Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) { + if (TypeID Jmp_buf = SpecialTypes[SPECIAL_TYPE_JMP_BUF]) { QualType Jmp_bufType = GetType(Jmp_buf); if (Jmp_bufType.isNull()) { Error("jmp_buf type is NULL"); @@ -5111,7 +5098,7 @@ void ASTReader::InitializeContext() { } } - if (unsigned Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) { + if (TypeID Sigjmp_buf = SpecialTypes[SPECIAL_TYPE_SIGJMP_BUF]) { QualType Sigjmp_bufType = GetType(Sigjmp_buf); if (Sigjmp_bufType.isNull()) { Error("sigjmp_buf type is NULL"); @@ -5129,25 +5116,24 @@ void ASTReader::InitializeContext() { } } - if (unsigned ObjCIdRedef - = SpecialTypes[SPECIAL_TYPE_OBJC_ID_REDEFINITION]) { + if (TypeID ObjCIdRedef = SpecialTypes[SPECIAL_TYPE_OBJC_ID_REDEFINITION]) { if (Context.ObjCIdRedefinitionType.isNull()) Context.ObjCIdRedefinitionType = GetType(ObjCIdRedef); } - if (unsigned ObjCClassRedef - = SpecialTypes[SPECIAL_TYPE_OBJC_CLASS_REDEFINITION]) { + if (TypeID ObjCClassRedef = + SpecialTypes[SPECIAL_TYPE_OBJC_CLASS_REDEFINITION]) { if (Context.ObjCClassRedefinitionType.isNull()) Context.ObjCClassRedefinitionType = GetType(ObjCClassRedef); } - if (unsigned ObjCSelRedef - = SpecialTypes[SPECIAL_TYPE_OBJC_SEL_REDEFINITION]) { + if (TypeID ObjCSelRedef = + SpecialTypes[SPECIAL_TYPE_OBJC_SEL_REDEFINITION]) { if (Context.ObjCSelRedefinitionType.isNull()) Context.ObjCSelRedefinitionType = GetType(ObjCSelRedef); } - if (unsigned Ucontext_t = SpecialTypes[SPECIAL_TYPE_UCONTEXT_T]) { + if (TypeID Ucontext_t = SpecialTypes[SPECIAL_TYPE_UCONTEXT_T]) { QualType Ucontext_tType = GetType(Ucontext_t); if (Ucontext_tType.isNull()) { Error("ucontext_t type is NULL"); @@ -6632,10 +6618,8 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { } /// Get the correct cursor and offset for loading a type. -ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) { - GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index); - assert(I != GlobalTypeMap.end() && "Corrupted global type map"); - ModuleFile *M = I->second; +ASTReader::RecordLocation ASTReader::TypeCursorForIndex(TypeID ID) { + auto [M, Index] = translateTypeIDToIndex(ID); return RecordLocation(M, M->TypeOffsets[Index - M->BaseTypeIndex].get() + M->DeclsBlockStartOffset); } @@ -6656,10 +6640,10 @@ static std::optional<Type::TypeClass> getTypeClassForCode(TypeCode code) { /// routine actually reads the record corresponding to the type at the given /// location. It is a helper routine for GetType, which deals with reading type /// IDs. -QualType ASTReader::readTypeRecord(unsigned Index) { +QualType ASTReader::readTypeRecord(TypeID ID) { assert(ContextObj && "reading type with no AST context"); ASTContext &Context = *ContextObj; - RecordLocation Loc = TypeCursorForIndex(Index); + RecordLocation Loc = TypeCursorForIndex(ID); BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor; // Keep track of where we are in the stream, then jump back there @@ -7100,14 +7084,25 @@ TypeSourceInfo *ASTRecordReader::readTypeSourceInfo() { return TInfo; } +std::pair<ModuleFile *, unsigned> +ASTReader::translateTypeIDToIndex(serialization::TypeID ID) const { + unsigned Index = + (ID & llvm::maskTrailingOnes<TypeID>(32)) >> Qualifiers::FastWidth; + + ModuleFile *OwningModuleFile = getOwningModuleFile(ID); + assert(OwningModuleFile && + "untranslated type ID or local type ID shouldn't be in TypesLoaded"); + return {OwningModuleFile, OwningModuleFile->BaseTypeIndex + Index}; +} + QualType ASTReader::GetType(TypeID ID) { assert(ContextObj && "reading type with no AST context"); ASTContext &Context = *ContextObj; unsigned FastQuals = ID & Qualifiers::FastMask; - unsigned Index = ID >> Qualifiers::FastWidth; - if (Index < NUM_PREDEF_TYPE_IDS) { + if (uint64_t Index = ID >> Qualifiers::FastWidth; + Index < NUM_PREDEF_TYPE_IDS) { QualType T; switch ((PredefinedTypeIDs)Index) { case PREDEF_TYPE_LAST_ID: @@ -7376,10 +7371,11 @@ QualType ASTReader::GetType(TypeID ID) { return T.withFastQualifiers(FastQuals); } - Index -= NUM_PREDEF_TYPE_IDS; + unsigned Index = translateTypeIDToIndex(ID).second; + assert(Index < TypesLoaded.size() && "Type index out-of-range"); if (TypesLoaded[Index].isNull()) { - TypesLoaded[Index] = readTypeRecord(Index); + TypesLoaded[Index] = readTypeRecord(ID); if (TypesLoaded[Index].isNull()) return QualType(); @@ -7392,27 +7388,28 @@ QualType ASTReader::GetType(TypeID ID) { return TypesLoaded[Index].withFastQualifiers(FastQuals); } -QualType ASTReader::getLocalType(ModuleFile &F, unsigned LocalID) { +QualType ASTReader::getLocalType(ModuleFile &F, TypeID LocalID) { return GetType(getGlobalTypeID(F, LocalID)); } -serialization::TypeID -ASTReader::getGlobalTypeID(ModuleFile &F, unsigned LocalID) const { - unsigned FastQuals = LocalID & Qualifiers::FastMask; - unsigned LocalIndex = LocalID >> Qualifiers::FastWidth; - - if (LocalIndex < NUM_PREDEF_TYPE_IDS) +serialization::TypeID ASTReader::getGlobalTypeID(ModuleFile &F, + TypeID LocalID) const { + if ((LocalID >> Qualifiers::FastWidth) < NUM_PREDEF_TYPE_IDS) return LocalID; if (!F.ModuleOffsetMap.empty()) ReadModuleOffsetMap(F); - ContinuousRangeMap<uint32_t, int, 2>::iterator I - = F.TypeRemap.find(LocalIndex - NUM_PREDEF_TYPE_IDS); - assert(I != F.TypeRemap.end() && "Invalid index into type index remap"); + unsigned ModuleFileIndex = LocalID >> 32; + LocalID &= llvm::maskTrailingOnes<TypeID>(32); - unsigned GlobalIndex = LocalIndex + I->second; - return (GlobalIndex << Qualifiers::FastWidth) | FastQuals; + if (ModuleFileIndex == 0) + LocalID -= NUM_PREDEF_TYPE_IDS << Qualifiers::FastWidth; + + ModuleFile &MF = + ModuleFileIndex ? *F.TransitiveImports[ModuleFileIndex - 1] : F; + ModuleFileIndex = MF.Index + 1; + return ((uint64_t)ModuleFileIndex << 32) | LocalID; } TemplateArgumentLocInfo @@ -7650,6 +7647,16 @@ ModuleFile *ASTReader::getOwningModuleFile(GlobalDeclID ID) const { return &getModuleManager()[ModuleFileIndex - 1]; } +ModuleFile *ASTReader::getOwningModuleFile(TypeID ID) const { + if (ID < NUM_PREDEF_TYPE_IDS) + return nullptr; + + uint64_t ModuleFileIndex = ID >> 32; + assert(ModuleFileIndex && "Untranslated Local Decl?"); + + return &getModuleManager()[ModuleFileIndex - 1]; +} + ModuleFile *ASTReader::getOwningModuleFile(const Decl *D) const { if (!D->isFromASTFile()) return nullptr; @@ -8167,7 +8174,6 @@ LLVM_DUMP_METHOD void ASTReader::dump() { llvm::errs() << "*** PCH/ModuleFile Remappings:\n"; dumpModuleIDMap("Global bit offset map", GlobalBitOffsetsMap); dumpModuleIDMap("Global source location entry map", GlobalSLocEntryMap); - dumpModuleIDMap("Global type map", GlobalTypeMap); dumpModuleIDMap("Global macro map", GlobalMacroMap); dumpModuleIDMap("Global submodule map", GlobalSubmoduleMap); dumpModuleIDMap("Global selector map", GlobalSelectorMap); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index dd6c135d54b46..4a20b97de55d3 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3262,17 +3262,18 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, /// Write the representation of a type to the AST stream. void ASTWriter::WriteType(QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; - if (IdxRef.getIndex() == 0) // we haven't seen this type before. + if (IdxRef.getValue() == 0) // we haven't seen this type before. IdxRef = TypeIdx(NextTypeID++); TypeIdx Idx = IdxRef; - assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); + assert(Idx.getModuleFileIndex() == 0 && "Re-writing a type from a prior AST"); + assert(Idx.getValue() >= FirstTypeID && "Writing predefined type"); // Emit the type's representation. uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. - unsigned Index = Idx.getIndex() - FirstTypeID; + uint64_t Index = Idx.getValue() - FirstTypeID; if (TypeOffsets.size() == Index) TypeOffsets.emplace_back(Offset); else if (TypeOffsets.size() < Index) { @@ -3345,12 +3346,10 @@ void ASTWriter::WriteTypeDeclOffsets() { auto Abbrev = std::make_shared<BitCodeAbbrev>(); Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { - RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(), - FirstTypeID - NUM_PREDEF_TYPE_IDS}; + RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size()}; Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets)); } @@ -5434,7 +5433,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, M.NumPreprocessedEntities); writeBaseIDOrNone(M.BaseSubmoduleID, M.LocalNumSubmodules); writeBaseIDOrNone(M.BaseSelectorID, M.LocalNumSelectors); - writeBaseIDOrNone(M.BaseTypeIndex, M.LocalNumTypes); } } RecordData::value_type Record[] = {MODULE_OFFSET_MAP}; @@ -6123,7 +6121,7 @@ TypeID ASTWriter::GetOrCreateTypeID(QualType T) { assert(!T.getLocalFastQualifiers()); TypeIdx &Idx = TypeIdxs[T]; - if (Idx.getIndex() == 0) { + if (Idx.getValue() == 0) { if (DoneWritingDeclsAndTypes) { assert(0 && "New type seen after serializing all the types to emit!"); return TypeIdx(); @@ -6626,11 +6624,9 @@ void ASTWriter::ReaderInitialized(ASTReader *Reader) { // Note, this will get called multiple times, once one the reader starts up // and again each time it's done reading a PCH or module. - FirstTypeID = NUM_PREDEF_TYPE_IDS + Chain->getTotalNumTypes(); FirstMacroID = NUM_PREDEF_MACRO_IDS + Chain->getTotalNumMacros(); FirstSubmoduleID = NUM_PREDEF_SUBMODULE_IDS + Chain->getTotalNumSubmodules(); FirstSelectorID = NUM_PREDEF_SELECTOR_IDS + Chain->getTotalNumSelectors(); - NextTypeID = FirstTypeID; NextMacroID = FirstMacroID; NextSelectorID = FirstSelectorID; NextSubmoduleID = FirstSubmoduleID; @@ -6659,13 +6655,22 @@ void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) { } void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { - // Always take the highest-numbered type index. This copes with an interesting + // Always take the type index that comes in later module files. + // This copes with an interesting // case for chained AST writing where we schedule writing the type and then, // later, deserialize the type from another AST. In this case, we want to - // keep the higher-numbered entry so that we can properly write it out to + // keep the just writing entry so that we can properly write it out to // the AST file. TypeIdx &StoredIdx = TypeIdxs[T]; - if (Idx.getIndex() >= StoredIdx.getIndex()) + + // Ignore it if the type comes from the current being written module file. + unsigned ModuleFileIndex = StoredIdx.getModuleFileIndex(); + if (ModuleFileIndex == 0 && StoredIdx.getValue()) + return; + + // Otherwise, keep the highest ID since the module file comes later has + // higher module file indexes. + if (Idx.getValue() >= StoredIdx.getValue()) StoredIdx = Idx; } diff --git a/clang/lib/Serialization/ModuleFile.cpp b/clang/lib/Serialization/ModuleFile.cpp index 7976c28b28671..4858cdbda5545 100644 --- a/clang/lib/Serialization/ModuleFile.cpp +++ b/clang/lib/Serialization/ModuleFile.cpp @@ -84,7 +84,6 @@ LLVM_DUMP_METHOD void ModuleFile::dump() { llvm::errs() << " Base type index: " << BaseTypeIndex << '\n' << " Number of types: " << LocalNumTypes << '\n'; - dumpLocalRemap("Type index local -> global map", TypeRemap); llvm::errs() << " Base decl index: " << BaseDeclIndex << '\n' << " Number of decls: " << LocalNumDecls << '\n'; diff --git a/clang/test/Modules/no-transitive-decls-change.cppm b/clang/test/Modules/no-transitive-decls-change.cppm index 42ac061bc90b3..83594b09ea789 100644 --- a/clang/test/Modules/no-transitive-decls-change.cppm +++ b/clang/test/Modules/no-transitive-decls-change.cppm @@ -44,10 +44,6 @@ export inline int getA() { return 43; } -export inline int getA2(int) { - return 88; -} - //--- m-partA.v1.cppm export module m:partA; @@ -63,7 +59,6 @@ namespace A_Impl { namespace A { using A_Impl::getAImpl; - // Adding a new declaration without introducing a new declaration name. using A_Impl::getA2Impl; } @@ -71,14 +66,9 @@ inline int getA() { return 43; } -inline int getA2(int) { - return 88; -} - -// Now we add a new declaration without introducing new identifier and new types. // The consuming module which didn't use m:partA completely is expected to be // not changed. -inline int getA(int) { +inline int getB(int) { return 88; } diff --git a/clang/test/Modules/no-transitive-identifier-change.cppm b/clang/test/Modules/no-transitive-identifier-change.cppm index 97e8ac4444fdd..35b519236aaed 100644 --- a/clang/test/Modules/no-transitive-identifier-change.cppm +++ b/clang/test/Modules/no-transitive-identifier-change.cppm @@ -57,7 +57,6 @@ export inline int getA() { return 43; } -// Now we add a new declaration without introducing a new type. // The consuming module which didn't use m:partA completely is expected to be // not changed. export inline int getA2() { @@ -67,7 +66,6 @@ export inline int getA2() { export class A { public: int getMem(); - // Now we add a new declaration without introducing a new type. // The consuming module which didn't use m:partA completely is expected to be // not changed. int getMem2(); @@ -77,7 +75,6 @@ export template <typename T> class ATempl { public: T getT(); - // Add a new declaration without introducing a new type. T getT2(); }; diff --git a/clang/test/Modules/no-transitive-type-change.cppm b/clang/test/Modules/no-transitive-type-change.cppm new file mode 100644 index 0000000000000..4d5d42b7c042c --- /dev/null +++ b/clang/test/Modules/no-transitive-type-change.cppm @@ -0,0 +1,68 @@ +// Testing that changing a type in an unused module file won't change +// the BMI of the current module file. +// +// RUN: rm -rf %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/m-partA.cppm -emit-reduced-module-interface -o %t/m-partA.pcm +// RUN: %clang_cc1 -std=c++20 %t/m-partA.v1.cppm -emit-reduced-module-interface -o \ +// RUN: %t/m-partA.v1.pcm +// RUN: %clang_cc1 -std=c++20 %t/m-partB.cppm -emit-reduced-module-interface -o %t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ +// RUN: -fmodule-file=m:partA=%t/m-partA.pcm -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.v1.pcm \ +// RUN: -fmodule-file=m:partA=%t/m-partA.v1.pcm -fmodule-file=m:partB=%t/m-partB.pcm +// +// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.pcm \ +// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.v1.pcm \ +// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// Since useBOnly only uses partB from module M, the change in partA shouldn't affect +// useBOnly. +// RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null + +//--- m-partA.cppm +export module m:partA; + +//--- m-partA.v1.cppm +export module m:partA; + +namespace NS { + class A { + public: + int getValue() { + return 43; + } + }; +} + +//--- m-partB.cppm +export module m:partB; + +export inline int getB() { + return 430; +} + +//--- m.cppm +export module m; +export import :partA; +export import :partB; + +//--- useBOnly.cppm +export module useBOnly; +import m; + +export inline int get() { + return getB(); +} + +//--- useAOnly.cppm +export module useAOnly; +import m; + +export inline int get() { + A<int> a; + return a.getValue(); +} diff --git a/clang/test/Modules/pr59999.cppm b/clang/test/Modules/pr59999.cppm index d6e6fff2b7c5e..5405091ad5291 100644 --- a/clang/test/Modules/pr59999.cppm +++ b/clang/test/Modules/pr59999.cppm @@ -12,16 +12,16 @@ // RUN: -fmodule-file=Module=%t/Module.pcm -emit-llvm -o - | FileCheck %t/Object.cppm // Test again with reduced BMI. -// RUN: rm -rf %t -// RUN: mkdir -p %t -// RUN: split-file %s %t +// RUNX: rm -rf %t +// RUNX: mkdir -p %t +// RUNX: split-file %s %t // -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Module.cppm \ -// RUN: -emit-reduced-module-interface -o %t/Module.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.cppm \ -// RUN: -fmodule-file=Module=%t/Module.pcm -emit-module-interface -o %t/Object.pcm -// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ -// RUN: -fmodule-file=Module=%t/Module.pcm -emit-llvm -o - | FileCheck %t/Object.cppm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Module.cppm \ +// RUNX: -emit-reduced-module-interface -o %t/Module.pcm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.cppm \ +// RUNX: -fmodule-file=Module=%t/Module.pcm -emit-module-interface -o %t/Object.pcm +// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ +// RUNX: -fmodule-file=Module=%t/Module.pcm -emit-llvm -o - | FileCheck %t/Object.cppm //--- Module.cppm @@ -29,27 +29,27 @@ export module Module; export template <class ObjectType> bool ModuleRegister() { return true; }; -export struct ModuleEntry { - static const bool bRegistered; -}; +// export struct ModuleEntry { +// static const bool bRegistered; +// }; -const bool ModuleEntry::bRegistered = ModuleRegister<ModuleEntry>(); +// const bool ModuleEntry::bRegistered = ModuleRegister<ModuleEntry>(); //--- Object.cppm export module Object; import Module; -export template <class ObjectType> bool ObjectRegister() { return true; } -export struct NObject { - static const bool bRegistered; -}; +// export template <class ObjectType> bool ObjectRegister() { return true; } +// export struct NObject { +// static const bool bRegistered; +// }; export struct ObjectModuleEntry { static const bool bRegistered; }; // This function is also required for crash -const bool NObject::bRegistered = ObjectRegister<NObject>(); +// const bool NObject::bRegistered = ObjectRegister<NObject>(); // One another function, that helps clang crash const bool ObjectModuleEntry::bRegistered = ModuleRegister<ObjectModuleEntry>(); >From 7dad94ddaf7ab62d7ba5305b3786a67903aea236 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Thu, 20 Jun 2024 11:06:24 +0800 Subject: [PATCH 3/5] Update --- .../include/clang/Serialization/ASTBitCodes.h | 12 +++-- clang/include/clang/Serialization/ASTReader.h | 4 +- clang/lib/Serialization/ASTCommon.cpp | 2 +- clang/lib/Serialization/ASTReader.cpp | 45 +++++++++++-------- clang/lib/Serialization/ASTWriter.cpp | 13 +++--- clang/test/Modules/pr59999.cppm | 36 +++++++-------- 6 files changed, 61 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 13ee1e4724f1e..a8ab8dd3c7acf 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -72,11 +72,10 @@ using DeclID = DeclIDBase::DeclID; /// An ID number that refers to a type in an AST file. /// /// The ID of a type is partitioned into three parts: -/// - the lower -/// three bits are used to store the const/volatile/restrict -/// qualifiers (as with QualType). -/// - the upper 29 bits provide a type index in the corresponding -/// module file. +/// - the lower three bits are used to store the const/volatile/restrict +/// qualifiers (as with QualType). +/// - the next 29 bits provide a type index in the corresponding +/// module file. /// - the upper 32 bits provide a module file index. /// /// The type index values are partitioned into two @@ -95,7 +94,6 @@ class TypeIdx { public: TypeIdx() = default; - explicit TypeIdx(uint32_t Idx) : ModuleFileIndex(0), Idx(Idx) {} explicit TypeIdx(uint32_t ModuleFileIdx, uint32_t Idx) : ModuleFileIndex(ModuleFileIdx), Idx(Idx) {} @@ -114,7 +112,7 @@ class TypeIdx { static TypeIdx fromTypeID(TypeID ID) { if (ID == TypeID(-1)) - return TypeIdx(-1); + return TypeIdx(0, -1); return TypeIdx(ID >> 32, (ID & llvm::maskTrailingOnes<TypeID>(32)) >> Qualifiers::FastWidth); diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index f4180984eaad3..aacbc37263173 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1881,6 +1881,9 @@ class ASTReader QualType GetType(serialization::TypeID ID); /// Resolve a local type ID within a given AST file into a type. + /// + /// A local type ID is only meaningful with the corresponding module file. + /// See the implementation of getGlobalTypeID for details. QualType getLocalType(ModuleFile &F, serialization::TypeID LocalID); /// Map a local type ID within a given AST file into a global type ID. @@ -1907,7 +1910,6 @@ class ASTReader /// if the declaration is not from a module file. ModuleFile *getOwningModuleFile(const Decl *D) const; ModuleFile *getOwningModuleFile(GlobalDeclID ID) const; - ModuleFile *getOwningModuleFile(serialization::TypeID ID) const; /// Returns the source location for the decl \p ID. SourceLocation getSourceLocationForDeclID(GlobalDeclID ID); diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp index bc662a87a7bf3..e30ceec92ce9b 100644 --- a/clang/lib/Serialization/ASTCommon.cpp +++ b/clang/lib/Serialization/ASTCommon.cpp @@ -278,7 +278,7 @@ serialization::TypeIdxFromBuiltin(const BuiltinType *BT) { break; } - return TypeIdx(ID); + return TypeIdx(0, ID); } unsigned serialization::ComputeHash(Selector Sel) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 9d05037b89392..5df81bfe3e92f 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7084,15 +7084,34 @@ TypeSourceInfo *ASTRecordReader::readTypeSourceInfo() { return TInfo; } +static unsigned getIndexForTypeID(serialization::TypeID ID) { + return (ID & llvm::maskTrailingOnes<TypeID>(32)) >> Qualifiers::FastWidth; + ; +} + +static unsigned getModuleFileIndexForTypeID(serialization::TypeID ID) { + return ID >> 32; +} + +static bool isPredefinedTypes(serialization::TypeID ID) { + // We don't need to erase the higher bits since if these bits are not 0, + // it must be larger than NUM_PREDEF_TYPE_IDS. + return (ID >> Qualifiers::FastWidth) < NUM_PREDEF_TYPE_IDS; +} + std::pair<ModuleFile *, unsigned> ASTReader::translateTypeIDToIndex(serialization::TypeID ID) const { - unsigned Index = - (ID & llvm::maskTrailingOnes<TypeID>(32)) >> Qualifiers::FastWidth; + assert(!isPredefinedTypes(ID) && + "Predefined type shouldn't be in TypesLoaded"); + unsigned ModuleFileIndex = getModuleFileIndexForTypeID(ID); + assert(ModuleFileIndex && "Untranslated Local Decl?"); - ModuleFile *OwningModuleFile = getOwningModuleFile(ID); + ModuleFile *OwningModuleFile = &getModuleManager()[ModuleFileIndex - 1]; assert(OwningModuleFile && "untranslated type ID or local type ID shouldn't be in TypesLoaded"); - return {OwningModuleFile, OwningModuleFile->BaseTypeIndex + Index}; + + return {OwningModuleFile, + OwningModuleFile->BaseTypeIndex + getIndexForTypeID(ID)}; } QualType ASTReader::GetType(TypeID ID) { @@ -7101,9 +7120,9 @@ QualType ASTReader::GetType(TypeID ID) { unsigned FastQuals = ID & Qualifiers::FastMask; - if (uint64_t Index = ID >> Qualifiers::FastWidth; - Index < NUM_PREDEF_TYPE_IDS) { + if (isPredefinedTypes(ID)) { QualType T; + unsigned Index = getIndexForTypeID(ID); switch ((PredefinedTypeIDs)Index) { case PREDEF_TYPE_LAST_ID: // We should never use this one. @@ -7394,13 +7413,13 @@ QualType ASTReader::getLocalType(ModuleFile &F, TypeID LocalID) { serialization::TypeID ASTReader::getGlobalTypeID(ModuleFile &F, TypeID LocalID) const { - if ((LocalID >> Qualifiers::FastWidth) < NUM_PREDEF_TYPE_IDS) + if (isPredefinedTypes(LocalID)) return LocalID; if (!F.ModuleOffsetMap.empty()) ReadModuleOffsetMap(F); - unsigned ModuleFileIndex = LocalID >> 32; + unsigned ModuleFileIndex = getModuleFileIndexForTypeID(LocalID); LocalID &= llvm::maskTrailingOnes<TypeID>(32); if (ModuleFileIndex == 0) @@ -7647,16 +7666,6 @@ ModuleFile *ASTReader::getOwningModuleFile(GlobalDeclID ID) const { return &getModuleManager()[ModuleFileIndex - 1]; } -ModuleFile *ASTReader::getOwningModuleFile(TypeID ID) const { - if (ID < NUM_PREDEF_TYPE_IDS) - return nullptr; - - uint64_t ModuleFileIndex = ID >> 32; - assert(ModuleFileIndex && "Untranslated Local Decl?"); - - return &getModuleManager()[ModuleFileIndex - 1]; -} - ModuleFile *ASTReader::getOwningModuleFile(const Decl *D) const { if (!D->isFromASTFile()) return nullptr; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4a20b97de55d3..e425ce4e6db85 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3263,7 +3263,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, void ASTWriter::WriteType(QualType T) { TypeIdx &IdxRef = TypeIdxs[T]; if (IdxRef.getValue() == 0) // we haven't seen this type before. - IdxRef = TypeIdx(NextTypeID++); + IdxRef = TypeIdx(0, NextTypeID++); TypeIdx Idx = IdxRef; assert(Idx.getModuleFileIndex() == 0 && "Re-writing a type from a prior AST"); @@ -6106,9 +6106,9 @@ static TypeID MakeTypeID(ASTContext &Context, QualType T, return TypeIdxFromBuiltin(BT).asTypeID(FastQuals); if (T == Context.AutoDeductTy) - return TypeIdx(PREDEF_TYPE_AUTO_DEDUCT).asTypeID(FastQuals); + return TypeIdx(0, PREDEF_TYPE_AUTO_DEDUCT).asTypeID(FastQuals); if (T == Context.AutoRRefDeductTy) - return TypeIdx(PREDEF_TYPE_AUTO_RREF_DEDUCT).asTypeID(FastQuals); + return TypeIdx(0, PREDEF_TYPE_AUTO_RREF_DEDUCT).asTypeID(FastQuals); return IdxForType(T).asTypeID(FastQuals); } @@ -6129,7 +6129,7 @@ TypeID ASTWriter::GetOrCreateTypeID(QualType T) { // We haven't seen this type before. Assign it a new ID and put it // into the queue of types to emit. - Idx = TypeIdx(NextTypeID++); + Idx = TypeIdx(0, NextTypeID++); DeclTypesToEmit.push(T); } return Idx; @@ -6659,18 +6659,19 @@ void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { // This copes with an interesting // case for chained AST writing where we schedule writing the type and then, // later, deserialize the type from another AST. In this case, we want to - // keep the just writing entry so that we can properly write it out to + // keep the entry from a later module so that we can properly write it out to // the AST file. TypeIdx &StoredIdx = TypeIdxs[T]; // Ignore it if the type comes from the current being written module file. + // Since the current being written module file unsigned ModuleFileIndex = StoredIdx.getModuleFileIndex(); if (ModuleFileIndex == 0 && StoredIdx.getValue()) return; // Otherwise, keep the highest ID since the module file comes later has // higher module file indexes. - if (Idx.getValue() >= StoredIdx.getValue()) + if (Idx.getModuleFileIndex() >= StoredIdx.getModuleFileIndex()) StoredIdx = Idx; } diff --git a/clang/test/Modules/pr59999.cppm b/clang/test/Modules/pr59999.cppm index 5405091ad5291..d6e6fff2b7c5e 100644 --- a/clang/test/Modules/pr59999.cppm +++ b/clang/test/Modules/pr59999.cppm @@ -12,16 +12,16 @@ // RUN: -fmodule-file=Module=%t/Module.pcm -emit-llvm -o - | FileCheck %t/Object.cppm // Test again with reduced BMI. -// RUNX: rm -rf %t -// RUNX: mkdir -p %t -// RUNX: split-file %s %t +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t // -// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Module.cppm \ -// RUNX: -emit-reduced-module-interface -o %t/Module.pcm -// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.cppm \ -// RUNX: -fmodule-file=Module=%t/Module.pcm -emit-module-interface -o %t/Object.pcm -// RUNX: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ -// RUNX: -fmodule-file=Module=%t/Module.pcm -emit-llvm -o - | FileCheck %t/Object.cppm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Module.cppm \ +// RUN: -emit-reduced-module-interface -o %t/Module.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.cppm \ +// RUN: -fmodule-file=Module=%t/Module.pcm -emit-module-interface -o %t/Object.pcm +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple %t/Object.pcm \ +// RUN: -fmodule-file=Module=%t/Module.pcm -emit-llvm -o - | FileCheck %t/Object.cppm //--- Module.cppm @@ -29,27 +29,27 @@ export module Module; export template <class ObjectType> bool ModuleRegister() { return true; }; -// export struct ModuleEntry { -// static const bool bRegistered; -// }; +export struct ModuleEntry { + static const bool bRegistered; +}; -// const bool ModuleEntry::bRegistered = ModuleRegister<ModuleEntry>(); +const bool ModuleEntry::bRegistered = ModuleRegister<ModuleEntry>(); //--- Object.cppm export module Object; import Module; -// export template <class ObjectType> bool ObjectRegister() { return true; } -// export struct NObject { -// static const bool bRegistered; -// }; +export template <class ObjectType> bool ObjectRegister() { return true; } +export struct NObject { + static const bool bRegistered; +}; export struct ObjectModuleEntry { static const bool bRegistered; }; // This function is also required for crash -// const bool NObject::bRegistered = ObjectRegister<NObject>(); +const bool NObject::bRegistered = ObjectRegister<NObject>(); // One another function, that helps clang crash const bool ObjectModuleEntry::bRegistered = ModuleRegister<ObjectModuleEntry>(); >From 381d5137fa051a64fd1ec831806f9fe23b02398f Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Thu, 20 Jun 2024 18:53:27 +0800 Subject: [PATCH 4/5] Update --- clang/include/clang/Serialization/ASTBitCodes.h | 14 ++++++++++++-- clang/include/clang/Serialization/ASTReader.h | 7 ++----- clang/lib/Serialization/ASTReader.cpp | 13 ++++++------- clang/lib/Serialization/ASTWriter.cpp | 2 +- .../test/Modules/no-transitive-type-change.cppm | 16 ++++++++++++++-- 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 842148fe53918..011231d907d75 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -81,9 +81,17 @@ using DeclID = DeclIDBase::DeclID; /// The type index values are partitioned into two /// sets. The values below NUM_PREDEF_TYPE_IDs are predefined type /// IDs (based on the PREDEF_TYPE_*_ID constants), with 0 as a -/// placeholder for "no type". Values from NUM_PREDEF_TYPE_IDs are -/// other types that have serialized representations. +/// placeholder for "no type". The module file index for predefined +/// types are always 0 since they don't belong to any modules. +/// Values from NUM_PREDEF_TYPE_IDs are other types that have +/// serialized representations. using TypeID = uint64_t; +/// Same with TypeID except that the LocalTypeID is only meaningful +/// with the corresponding ModuleFile. +/// +/// FIXME: Make TypeID and LocalTypeID a class to improve the type +/// safety. +using LocalTypeID = TypeID; /// A type index; the type ID with the qualifier bits removed. /// Keep structure alignment 32-bit since the blob is assumed as 32-bit @@ -119,6 +127,8 @@ class TypeIdx { } }; +static_assert(alignof(TypeIdx) == 4); + /// A structure for putting "fast"-unqualified QualTypes into a /// DenseMap. This uses the standard pointer hash function. struct UnsafeQualTypeDenseMapInfo { diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 3ea709120a018..b26f4b4f9a654 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1889,14 +1889,11 @@ class ASTReader QualType GetType(serialization::TypeID ID); /// Resolve a local type ID within a given AST file into a type. - /// - /// A local type ID is only meaningful with the corresponding module file. - /// See the implementation of getGlobalTypeID for details. - QualType getLocalType(ModuleFile &F, serialization::TypeID LocalID); + QualType getLocalType(ModuleFile &F, serialization::LocalTypeID LocalID); /// Map a local type ID within a given AST file into a global type ID. serialization::TypeID getGlobalTypeID(ModuleFile &F, - serialization::TypeID LocalID) const; + serialization::LocalTypeID LocalID) const; /// Read a type from the current position in the given record, which /// was read from the given AST file. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 703fd4e1fb8af..552a3af546e75 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -7137,14 +7137,13 @@ TypeSourceInfo *ASTRecordReader::readTypeSourceInfo() { static unsigned getIndexForTypeID(serialization::TypeID ID) { return (ID & llvm::maskTrailingOnes<TypeID>(32)) >> Qualifiers::FastWidth; - ; } static unsigned getModuleFileIndexForTypeID(serialization::TypeID ID) { return ID >> 32; } -static bool isPredefinedTypes(serialization::TypeID ID) { +static bool isPredefinedType(serialization::TypeID ID) { // We don't need to erase the higher bits since if these bits are not 0, // it must be larger than NUM_PREDEF_TYPE_IDS. return (ID >> Qualifiers::FastWidth) < NUM_PREDEF_TYPE_IDS; @@ -7152,7 +7151,7 @@ static bool isPredefinedTypes(serialization::TypeID ID) { std::pair<ModuleFile *, unsigned> ASTReader::translateTypeIDToIndex(serialization::TypeID ID) const { - assert(!isPredefinedTypes(ID) && + assert(!isPredefinedType(ID) && "Predefined type shouldn't be in TypesLoaded"); unsigned ModuleFileIndex = getModuleFileIndexForTypeID(ID); assert(ModuleFileIndex && "Untranslated Local Decl?"); @@ -7171,7 +7170,7 @@ QualType ASTReader::GetType(TypeID ID) { unsigned FastQuals = ID & Qualifiers::FastMask; - if (isPredefinedTypes(ID)) { + if (isPredefinedType(ID)) { QualType T; unsigned Index = getIndexForTypeID(ID); switch ((PredefinedTypeIDs)Index) { @@ -7463,13 +7462,13 @@ QualType ASTReader::GetType(TypeID ID) { return TypesLoaded[Index].withFastQualifiers(FastQuals); } -QualType ASTReader::getLocalType(ModuleFile &F, TypeID LocalID) { +QualType ASTReader::getLocalType(ModuleFile &F, LocalTypeID LocalID) { return GetType(getGlobalTypeID(F, LocalID)); } serialization::TypeID ASTReader::getGlobalTypeID(ModuleFile &F, - TypeID LocalID) const { - if (isPredefinedTypes(LocalID)) + LocalTypeID LocalID) const { + if (isPredefinedType(LocalID)) return LocalID; if (!F.ModuleOffsetMap.empty()) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index f86cf67d7d916..810bd41cdde46 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6696,7 +6696,7 @@ void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { TypeIdx &StoredIdx = TypeIdxs[T]; // Ignore it if the type comes from the current being written module file. - // Since the current being written module file + // Since the current module file being written logically has the highest index. unsigned ModuleFileIndex = StoredIdx.getModuleFileIndex(); if (ModuleFileIndex == 0 && StoredIdx.getValue()) return; diff --git a/clang/test/Modules/no-transitive-type-change.cppm b/clang/test/Modules/no-transitive-type-change.cppm index 4d5d42b7c042c..9ec72e851b17d 100644 --- a/clang/test/Modules/no-transitive-type-change.cppm +++ b/clang/test/Modules/no-transitive-type-change.cppm @@ -22,13 +22,26 @@ // Since useBOnly only uses partB from module M, the change in partA shouldn't affect // useBOnly. // RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null +// +// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.pcm \ +// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// RUN: %clang_cc1 -std=c++20 %t/useAOnly.cppm -emit-reduced-module-interface -o %t/useAOnly.v1.pcm \ +// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ +// RUN: -fmodule-file=m:partB=%t/m-partB.pcm +// Since useAOnly uses partA from module M, the change in partA should affect useAOnly. +// RUN: not diff %t/useAOnly.pcm %t/useAOnly.v1.pcm &> /dev/null //--- m-partA.cppm export module m:partA; +export int getValueFromA() { return 43; } + //--- m-partA.v1.cppm export module m:partA; +export int getValueFromA() { return 43; } + namespace NS { class A { public: @@ -63,6 +76,5 @@ export module useAOnly; import m; export inline int get() { - A<int> a; - return a.getValue(); + return getValueFromA(); } >From 011abc2a20c24c4c802a37c9be1173c11981e67b Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Thu, 20 Jun 2024 18:53:43 +0800 Subject: [PATCH 5/5] update --- clang/include/clang/Serialization/ASTReader.h | 4 ++-- clang/lib/Serialization/ASTWriter.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b26f4b4f9a654..f41c473c97cd9 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1892,8 +1892,8 @@ class ASTReader QualType getLocalType(ModuleFile &F, serialization::LocalTypeID LocalID); /// Map a local type ID within a given AST file into a global type ID. - serialization::TypeID getGlobalTypeID(ModuleFile &F, - serialization::LocalTypeID LocalID) const; + serialization::TypeID + getGlobalTypeID(ModuleFile &F, serialization::LocalTypeID LocalID) const; /// Read a type from the current position in the given record, which /// was read from the given AST file. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 810bd41cdde46..0297e20e9116f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6696,7 +6696,8 @@ void ASTWriter::TypeRead(TypeIdx Idx, QualType T) { TypeIdx &StoredIdx = TypeIdxs[T]; // Ignore it if the type comes from the current being written module file. - // Since the current module file being written logically has the highest index. + // Since the current module file being written logically has the highest + // index. unsigned ModuleFileIndex = StoredIdx.getModuleFileIndex(); if (ModuleFileIndex == 0 && StoredIdx.getValue()) return; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits