dang updated this revision to Diff 269155.
dang marked an inline comment as done.
dang added a comment.
Address some code review feedback
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D80383/new/
https://reviews.llvm.org/D80383
Files:
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTWriter.h
clang/include/clang/Serialization/ModuleFile.h
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTReaderDecl.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/lib/Serialization/ASTWriterDecl.cpp
clang/test/Modules/ASTSignature.c
clang/test/Modules/Inputs/ASTHash/module.modulemap
clang/test/Modules/Inputs/ASTHash/my_header_1.h
clang/test/Modules/Inputs/ASTHash/my_header_2.h
Index: clang/test/Modules/Inputs/ASTHash/my_header_2.h
===================================================================
--- /dev/null
+++ clang/test/Modules/Inputs/ASTHash/my_header_2.h
@@ -0,0 +1,3 @@
+#include "my_header_1.h"
+
+extern my_int var;
Index: clang/test/Modules/Inputs/ASTHash/my_header_1.h
===================================================================
--- /dev/null
+++ clang/test/Modules/Inputs/ASTHash/my_header_1.h
@@ -0,0 +1 @@
+typedef int my_int;
Index: clang/test/Modules/Inputs/ASTHash/module.modulemap
===================================================================
--- /dev/null
+++ clang/test/Modules/Inputs/ASTHash/module.modulemap
@@ -0,0 +1,8 @@
+module MyHeader1 {
+ header "my_header_1.h"
+}
+
+module MyHeader2 {
+ header "my_header_2.h"
+ export *
+}
Index: clang/test/Modules/ASTSignature.c
===================================================================
--- /dev/null
+++ clang/test/Modules/ASTSignature.c
@@ -0,0 +1,24 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \
+// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \
+// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
+// RUN: cp %t/MyHeader2.pcm %t1.pcm
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -iquote "/dev/null" -iquote %S/Inputs/ASTHash/ -fsyntax-only \
+// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \
+// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s
+// RUN: cp %t/MyHeader2.pcm %t2.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump
+// RUN: cat %t1.dump %t2.dump | FileCheck %s
+
+#include "my_header_2.h"
+
+my_int var = 42;
+
+// CHECK: [[AST_BLOCK_HASH:<AST_BLOCK_HASH .*>]]
+// CHECK: [[SIGNATURE:<SIGNATURE .*>]]
+// CHECK: [[AST_BLOCK_HASH]]
+// CHECK-NOT: [[SIGNATURE]]
+// The modules built by this test are designed to yield the same AST. If this
+// test fails, it means that the AST block is has become non-relocatable.
Index: clang/lib/Serialization/ASTWriterDecl.cpp
===================================================================
--- clang/lib/Serialization/ASTWriterDecl.cpp
+++ clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2434,12 +2434,12 @@
SourceLocation Loc = D->getLocation();
unsigned Index = ID - FirstDeclID;
if (DeclOffsets.size() == Index)
- DeclOffsets.emplace_back(Loc, Offset);
+ DeclOffsets.emplace_back(Loc, Offset, ASTBlockRange.first);
else if (DeclOffsets.size() < Index) {
// FIXME: Can/should this happen?
DeclOffsets.resize(Index+1);
DeclOffsets[Index].setLocation(Loc);
- DeclOffsets[Index].setBitOffset(Offset);
+ DeclOffsets[Index].setBitOffset(Offset, ASTBlockRange.first);
} else {
llvm_unreachable("declarations should be emitted in ID order");
}
Index: clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -10,14 +10,12 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/AST/OpenMPClause.h"
-#include "clang/Serialization/ASTRecordWriter.h"
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
#include "MultiOnDiskHashTable.h"
-#include "clang/AST/AbstractTypeWriter.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTUnresolvedSet.h"
+#include "clang/AST/AbstractTypeWriter.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
@@ -31,6 +29,7 @@
#include "clang/AST/ExprCXX.h"
#include "clang/AST/LambdaCapture.h"
#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
@@ -65,7 +64,9 @@
#include "clang/Sema/ObjCMethodList.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/Weak.h"
+#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/ASTRecordWriter.h"
#include "clang/Serialization/InMemoryModuleCache.h"
#include "clang/Serialization/ModuleFile.h"
#include "clang/Serialization/ModuleFileExtension.h"
@@ -961,6 +962,7 @@
BLOCK(UNHASHED_CONTROL_BLOCK);
RECORD(SIGNATURE);
+ RECORD(AST_BLOCK_HASH);
RECORD(DIAGNOSTIC_OPTIONS);
RECORD(DIAG_PRAGMA_MAPPINGS);
@@ -1026,13 +1028,23 @@
return Filename + Pos;
}
-ASTFileSignature ASTWriter::createSignature(StringRef Bytes) {
- // Calculate the hash till start of UNHASHED_CONTROL_BLOCK.
+std::pair<ASTFileSignature, ASTFileSignature>
+ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
llvm::SHA1 Hasher;
- Hasher.update(ArrayRef<uint8_t>(Bytes.bytes_begin(), Bytes.size()));
+ Hasher.update(ASTBlockBytes);
auto Hash = Hasher.result();
+ ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hash);
+
+ // Add the remaing bytes (i.e. bytes before the unhashed control block that
+ // are not part of the AST block)
+ Hasher.update(
+ AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
+ Hasher.update(
+ AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
+ Hash = Hasher.result();
+ ASTFileSignature Signature = ASTFileSignature::create(Hash);
- return ASTFileSignature::create(Hash);
+ return std::make_pair(ASTBlockHash, Signature);
}
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
@@ -1049,7 +1061,16 @@
ASTFileSignature Signature;
if (WritingModule &&
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
- Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl));
+ ASTFileSignature ASTBlockHash;
+ auto ASTBlockStartByte = ASTBlockRange.first >> 3;
+ auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
+ std::tie(ASTBlockHash, Signature) = createSignature(
+ StringRef(Buffer.begin(), StartOfUnhashedControl),
+ StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
+
+ Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
+ Stream.EmitRecord(AST_BLOCK_HASH, Record);
+ Record.clear();
Record.append(Signature.begin(), Signature.end());
Stream.EmitRecord(SIGNATURE, Record);
Record.clear();
@@ -2041,7 +2062,7 @@
RecordData::value_type Record[] = {
SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
SourceMgr.getNextLocalOffset() - 1 /* skip dummy */,
- SLocEntryOffsetsBase};
+ SLocEntryOffsetsBase - ASTBlockRange.first};
Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
bytes(SLocEntryOffsets));
}
@@ -2320,7 +2341,7 @@
{
RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
FirstMacroID - NUM_PREDEF_MACRO_IDS,
- MacroOffsetsBase};
+ MacroOffsetsBase - ASTBlockRange.first};
Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
}
}
@@ -2834,7 +2855,7 @@
assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST");
// Emit the type's representation.
- uint64_t Offset = ASTTypeWriter(*this).write(T);
+ uint64_t Offset = ASTTypeWriter(*this).write(T) - ASTBlockRange.first;
// Record the offset for this type.
unsigned Index = Idx.getIndex() - FirstTypeID;
@@ -4544,6 +4565,8 @@
WriteControlBlock(PP, Context, isysroot, OutputFile);
// Write the remaining AST contents.
+ Stream.FlushToWord();
+ ASTBlockRange.first = Stream.GetCurrentBitNo();
Stream.EnterSubblock(AST_BLOCK_ID, 5);
// This is so that older clang versions, before the introduction
@@ -4675,9 +4698,9 @@
// c++-base-specifiers-id:i32
// type-id:i32)
//
- // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or
- // MK_ExplicitModule, then the module-name is the module name. Otherwise,
- // it is the module file name.
+ // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule,
+ // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the
+ // module name. Otherwise, it is the module file name.
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
@@ -4690,10 +4713,7 @@
endian::Writer LE(Out, little);
LE.write<uint8_t>(static_cast<uint8_t>(M.Kind));
- StringRef Name =
- M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule
- ? M.ModuleName
- : M.FileName;
+ StringRef Name = M.isModule() ? M.ModuleName : M.FileName;
LE.write<uint16_t>(Name.size());
Out.write(Name.data(), Name.size());
@@ -4905,6 +4925,7 @@
NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts};
Stream.EmitRecord(STATISTICS, Record);
Stream.ExitBlock();
+ ASTBlockRange.second = Stream.GetCurrentBitNo();
// Write the module file extension blocks.
for (const auto &ExtWriter : ModuleFileExtensionWriters)
Index: clang/lib/Serialization/ASTReaderDecl.cpp
===================================================================
--- clang/lib/Serialization/ASTReaderDecl.cpp
+++ clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2869,7 +2869,7 @@
const DeclOffset &DOffs =
M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
Loc = TranslateSourceLocation(*M, DOffs.getLocation());
- return RecordLocation(M, DOffs.getBitOffset());
+ return RecordLocation(M, DOffs.getBitOffset(M->ASTBlockBitOffset));
}
ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -3377,7 +3377,7 @@
F.SLocEntryOffsets = (const uint32_t *)Blob.data();
F.LocalNumSLocEntries = Record[0];
unsigned SLocSpaceSize = Record[1];
- F.SLocEntryOffsetsBase = Record[2];
+ F.SLocEntryOffsetsBase = Record[2] + F.ASTBlockBitOffset;
std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
SLocSpaceSize);
@@ -3696,7 +3696,7 @@
F.MacroOffsets = (const uint32_t *)Blob.data();
F.LocalNumMacros = Record[0];
unsigned LocalBaseMacroID = Record[1];
- F.MacroOffsetsBase = Record[2];
+ F.MacroOffsetsBase = Record[2] + F.ASTBlockBitOffset;
F.BaseMacroID = getTotalNumMacros();
if (F.LocalNumMacros > 0) {
@@ -3837,17 +3837,18 @@
while (Data < DataEnd) {
// FIXME: Looking up dependency modules by filename is horrible. Let's
- // start fixing this with prebuilt and explicit modules and see how it
- // goes...
+ // start fixing this with prebuilt, explicit and implicit modules and see
+ // how it goes...
using namespace llvm::support;
ModuleKind Kind = static_cast<ModuleKind>(
endian::readNext<uint8_t, little, unaligned>(Data));
uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
StringRef Name = StringRef((const char*)Data, Len);
Data += Len;
- ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule
- ? ModuleMgr.lookupByModuleName(Name)
- : ModuleMgr.lookupByFileName(Name));
+ ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
+ Kind == MK_ImplicitModule
+ ? ModuleMgr.lookupByModuleName(Name)
+ : ModuleMgr.lookupByFileName(Name));
if (!OM) {
std::string Msg =
"SourceLocation remap refers to unknown module, cannot find ";
@@ -4552,6 +4553,7 @@
// This is used for compatibility with older PCH formats.
bool HaveReadControlBlock = false;
while (true) {
+ const uint64_t CurrentStreamOffset = Stream.GetCurrentBitNo();
Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry) {
Error(MaybeEntry.takeError());
@@ -4606,6 +4608,7 @@
return VersionMismatch;
}
+ F.ASTBlockBitOffset = CurrentStreamOffset;
// Record that we've loaded this module.
Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc));
ShouldFinalizePCM = true;
@@ -4736,6 +4739,10 @@
if (F)
std::copy(Record.begin(), Record.end(), F->Signature.data());
break;
+ case AST_BLOCK_HASH:
+ if (F)
+ std::copy(Record.begin(), Record.end(), F->ASTBlockHash.data());
+ break;
case DIAGNOSTIC_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
if (Listener && ValidateDiagnosticOptions &&
@@ -6350,7 +6357,8 @@
assert(I != GlobalTypeMap.end() && "Corrupted global type map");
ModuleFile *M = I->second;
return RecordLocation(
- M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset());
+ M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() +
+ M->ASTBlockBitOffset);
}
static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
Index: clang/include/clang/Serialization/ModuleFile.h
===================================================================
--- clang/include/clang/Serialization/ModuleFile.h
+++ clang/include/clang/Serialization/ModuleFile.h
@@ -168,6 +168,10 @@
/// and modification time to identify this particular file.
ASTFileSignature Signature;
+ /// The signature of the AST block of the module file, this can be used to
+ /// unique module files based on AST contents.
+ ASTFileSignature ASTBlockHash;
+
/// Whether this module has been directly imported by the
/// user.
bool DirectlyImported = false;
@@ -185,6 +189,9 @@
/// The global bit offset (or base) of this module
uint64_t GlobalBitOffset = 0;
+ /// The bit offset of the AST block of this module
+ uint64_t ASTBlockBitOffset = 0;
+
/// The serialized bitstream data for this file.
StringRef Data;
Index: clang/include/clang/Serialization/ASTWriter.h
===================================================================
--- clang/include/clang/Serialization/ASTWriter.h
+++ clang/include/clang/Serialization/ASTWriter.h
@@ -27,6 +27,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -441,7 +442,9 @@
/// A list of the module file extension writers.
std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
- ModuleFileExtensionWriters;
+ ModuleFileExtensionWriters;
+
+ std::pair<uint64_t, uint64_t> ASTBlockRange;
/// Retrieve or create a submodule ID for this module.
unsigned getSubmoduleID(Module *Mod);
@@ -458,7 +461,8 @@
ASTContext &Context);
/// Calculate hash of the pcm content.
- static ASTFileSignature createSignature(StringRef Bytes);
+ static std::pair<ASTFileSignature, ASTFileSignature>
+ createSignature(StringRef AllBytes, StringRef ASTBlockBytes);
void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
bool Modules);
Index: clang/include/clang/Serialization/ASTBitCodes.h
===================================================================
--- clang/include/clang/Serialization/ASTBitCodes.h
+++ clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@
/// Version 4 of AST files also requires that the version control branch and
/// revision match exactly, since there is no backward compatibility of
/// AST files at this time.
- const unsigned VERSION_MAJOR = 10;
+ const unsigned VERSION_MAJOR = 11;
/// AST file minor version number supported by this version of
/// Clang.
@@ -242,14 +242,16 @@
/// Raw source location.
unsigned Loc = 0;
- /// Offset in the AST file. Keep structure alignment 32-bit and avoid
- /// padding gap because undefined value in the padding affects AST hash.
+ /// Offset relative to the start of the AST block. Keep structure
+ /// alignment 32-bit and avoid padding gap because undefined value in the
+ /// padding affects AST hash.
UnderalignedInt64 BitOffset;
DeclOffset() = default;
- DeclOffset(SourceLocation Loc, uint64_t BitOffset) {
+ DeclOffset(SourceLocation Loc, uint64_t BitOffset,
+ uint64_t ASTBlockBitOffset) {
setLocation(Loc);
- setBitOffset(BitOffset);
+ setBitOffset(BitOffset, ASTBlockBitOffset);
}
void setLocation(SourceLocation L) {
@@ -260,12 +262,12 @@
return SourceLocation::getFromRawEncoding(Loc);
}
- void setBitOffset(uint64_t Offset) {
- BitOffset.setBitOffset(Offset);
+ void setBitOffset(uint64_t Offset, const uint64_t ASTBlockBitOffset) {
+ BitOffset.setBitOffset(Offset - ASTBlockBitOffset);
}
- uint64_t getBitOffset() const {
- return BitOffset.getBitOffset();
+ uint64_t getBitOffset(const uint64_t ASTBlockBitOffset) const {
+ return BitOffset.getBitOffset() + ASTBlockBitOffset;
}
};
@@ -394,6 +396,9 @@
/// Record code for the signature that identifiers this AST file.
SIGNATURE = 1,
+ /// Record code for the content hash of the AST block.
+ AST_BLOCK_HASH,
+
/// Record code for the diagnostic options table.
DIAGNOSTIC_OPTIONS,
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits