simon_tatham created this revision. simon_tatham added reviewers: rsmith, lebedev.ri, akyrtzi. simon_tatham requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
This is part of a patch series working towards the ability to make SourceLocation into a 64-bit type to handle larger translation units. Unconditionally expanding the serialized encoding to 64 bits is the simplest way to ensure that it will be big enough to handle an expanded SourceLocation. As I understand it, PCH files are already automatically versioned with the exact revision of the compiler that built them, so this shouldn't introduce any compatibility problem, because clang will already refuse to load any PCH from a different version of itself. Patch based on previous work by Mikhail Maltsev. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D105497 Files: clang/include/clang/Serialization/ASTReader.h clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp
Index: clang/lib/Serialization/ASTWriter.cpp =================================================================== --- clang/lib/Serialization/ASTWriter.cpp +++ clang/lib/Serialization/ASTWriter.cpp @@ -4636,21 +4636,29 @@ LE.write<uint16_t>(Name.size()); Out.write(Name.data(), Name.size()); - // Note: if a base ID was uint max, it would not be possible to load - // another module after it or have more than one entity inside it. - uint32_t None = std::numeric_limits<uint32_t>::max(); - auto writeBaseIDOrNone = [&](auto BaseID, bool ShouldWrite) { - assert(BaseID < std::numeric_limits<uint32_t>::max() && "base id too high"); + using T = decltype(BaseID); + static_assert(std::is_same<T, uint32_t>::value || + std::is_same<T, uint64_t>::value, + "Invalid BaseID type"); + assert(BaseID < std::numeric_limits<T>::max() && "base id too high"); if (ShouldWrite) - LE.write<uint32_t>(BaseID); - else - LE.write<uint32_t>(None); + LE.write<T>(BaseID); + else { + // Note: if a base ID was uint max, it would not be possible to load + // another module after it or have more than one entity inside it. + constexpr T None = std::numeric_limits<T>::max(); + LE.write<T>(None); + } }; // These values should be unique within a chain, since they will be read // as keys into ContinuousRangeMaps. - writeBaseIDOrNone(M.SLocEntryBaseOffset, M.LocalNumSLocEntries); + + // SourceLocations are serialized as uint64_t, irrespective of + // CLANG_64_BIT_SOURCE_LOCATIONS + writeBaseIDOrNone(static_cast<uint64_t>(M.SLocEntryBaseOffset), + M.LocalNumSLocEntries); writeBaseIDOrNone(M.BaseIdentifierID, M.LocalNumIdentifiers); writeBaseIDOrNone(M.BaseMacroID, M.LocalNumMacros); writeBaseIDOrNone(M.BasePreprocessedEntityID, Index: clang/lib/Serialization/ASTReader.cpp =================================================================== --- clang/lib/Serialization/ASTReader.cpp +++ clang/lib/Serialization/ASTReader.cpp @@ -3868,8 +3868,7 @@ return; } - SourceLocation::UIntType SLocOffset = - endian::readNext<uint32_t, little, unaligned>(Data); + uint64_t SLocOffset = endian::readNext<uint64_t, little, unaligned>(Data); uint32_t IdentifierIDOffset = endian::readNext<uint32_t, little, unaligned>(Data); uint32_t MacroIDOffset = @@ -3885,6 +3884,15 @@ uint32_t TypeIndexOffset = endian::readNext<uint32_t, little, unaligned>(Data); + // SourceLocations are serialized as uint64_t, irrespective of + // CLANG_64_BIT_SOURCE_LOCATIONS. So we have to bounds-check them + // when reading back in. + if (SLocOffset > std::numeric_limits<SourceLocation::UIntType>::max()) { + Error("This version of clang cannot handle SourceLocations bigger than " + "32 bits"); + return; + } + auto mapOffset = [&](uint32_t Offset, uint32_t BaseOffset, RemapBuilder &Remap) { constexpr uint32_t None = std::numeric_limits<uint32_t>::max(); Index: clang/include/clang/Serialization/ASTReader.h =================================================================== --- clang/include/clang/Serialization/ASTReader.h +++ clang/include/clang/Serialization/ASTReader.h @@ -781,7 +781,7 @@ /// /// CodeGen has to emit VTables for these records, so they have to be eagerly /// deserialized. - SmallVector<serialization::DeclID, 64> VTableUses; + SmallVector<uint64_t, 64> VTableUses; /// A snapshot of the pending instantiations in the chain. /// @@ -789,7 +789,7 @@ /// end of the TU. It consists of a pair of values for every pending /// instantiation where the first value is the ID of the decl and the second /// is the instantiation location. - SmallVector<serialization::DeclID, 64> PendingInstantiations; + SmallVector<uint64_t, 64> PendingInstantiations; //@} @@ -807,11 +807,11 @@ /// Method selectors used in a @selector expression. Used for /// implementation of -Wselector. - SmallVector<serialization::SelectorID, 64> ReferencedSelectorsData; + SmallVector<uint64_t, 64> ReferencedSelectorsData; /// A snapshot of Sema's weak undeclared identifier tracking, for /// generating warnings. - SmallVector<serialization::IdentifierID, 64> WeakUndeclaredIdentifiers; + SmallVector<uint64_t, 64> WeakUndeclaredIdentifiers; /// The IDs of type aliases for ext_vectors that exist in the chain. /// @@ -900,7 +900,7 @@ /// A list of undefined decls with internal linkage followed by the /// SourceLocation of a matching ODR-use. - SmallVector<serialization::DeclID, 8> UndefinedButUsed; + SmallVector<uint64_t, 8> UndefinedButUsed; /// Delete expressions to analyze at the end of translation unit. SmallVector<uint64_t, 8> DelayedDeleteExprs;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits