llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-modules Author: Haojian Wu (hokein) <details> <summary>Changes</summary> See the discussion in https://github.com/llvm/llvm-project/pull/145529. This will slightly increase the PCM size (~5%), some data (in-memory preamble size in clangd): - SemaExpr.cpp: 77MB -> 80MB - FindTarget.cpp: 71MB -> 75 MB --- Patch is 22.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145670.diff 8 Files Affected: - (modified) clang/include/clang/Serialization/ASTReader.h (+8-12) - (modified) clang/include/clang/Serialization/ASTRecordReader.h (+5-6) - (modified) clang/include/clang/Serialization/ASTRecordWriter.h (+5-6) - (modified) clang/include/clang/Serialization/ASTWriter.h (+3-7) - (modified) clang/include/clang/Serialization/SourceLocationEncoding.h (+6-108) - (modified) clang/lib/Serialization/ASTReader.cpp (+11-17) - (modified) clang/lib/Serialization/ASTWriter.cpp (+15-25) - (modified) clang/unittests/Serialization/SourceLocationEncodingTest.cpp (-57) ``````````diff diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 7a4b7d21bb20e..7d4b4467eb97d 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -464,8 +464,6 @@ class ASTReader using ModuleReverseIterator = ModuleManager::ModuleReverseIterator; private: - using LocSeq = SourceLocationSequence; - /// The receiver of some callbacks invoked by ASTReader. std::unique_ptr<ASTReaderListener> Listener; @@ -2445,18 +2443,16 @@ class ASTReader /// Read a source location from raw form and return it in its /// originating module file's source location space. std::pair<SourceLocation, unsigned> - ReadUntranslatedSourceLocation(RawLocEncoding Raw, - LocSeq *Seq = nullptr) const { - return SourceLocationEncoding::decode(Raw, Seq); + ReadUntranslatedSourceLocation(RawLocEncoding Raw) const { + return SourceLocationEncoding::decode(Raw); } /// Read a source location from raw form. - SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw, - LocSeq *Seq = nullptr) const { + SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw) const { if (!MF.ModuleOffsetMap.empty()) ReadModuleOffsetMap(MF); - auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq); + auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw); ModuleFile *OwningModuleFile = ModuleFileIndex == 0 ? &MF : MF.TransitiveImports[ModuleFileIndex - 1]; @@ -2484,9 +2480,9 @@ class ASTReader /// Read a source location. SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, - const RecordDataImpl &Record, unsigned &Idx, - LocSeq *Seq = nullptr) { - return ReadSourceLocation(ModuleFile, Record[Idx++], Seq); + const RecordDataImpl &Record, + unsigned &Idx) { + return ReadSourceLocation(ModuleFile, Record[Idx++]); } /// Read a FileID. @@ -2505,7 +2501,7 @@ class ASTReader /// Read a source range. SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record, - unsigned &Idx, LocSeq *Seq = nullptr); + unsigned &Idx); static llvm::BitVector ReadBitVector(const RecordData &Record, const StringRef Blob); diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h index da3f504ff27df..1472497ff5e7e 100644 --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -32,7 +32,6 @@ class OMPChildren; class ASTRecordReader : public serialization::DataStreamBasicReader<ASTRecordReader> { using ModuleFile = serialization::ModuleFile; - using LocSeq = SourceLocationSequence; ASTReader *Reader; ModuleFile *F; @@ -160,7 +159,7 @@ class ASTRecordReader TypeSourceInfo *readTypeSourceInfo(); /// Reads the location information for a type. - void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); + void readTypeLoc(TypeLoc TL); /// Map a local type ID within a given AST file to a global type ID. serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const { @@ -287,13 +286,13 @@ class ASTRecordReader void readOpenACCRoutineDeclAttr(OpenACCRoutineDeclAttr *A); /// Read a source location, advancing Idx. - SourceLocation readSourceLocation(LocSeq *Seq = nullptr) { - return Reader->ReadSourceLocation(*F, Record, Idx, Seq); + SourceLocation readSourceLocation() { + return Reader->ReadSourceLocation(*F, Record, Idx); } /// Read a source range, advancing Idx. - SourceRange readSourceRange(LocSeq *Seq = nullptr) { - return Reader->ReadSourceRange(*F, Record, Idx, Seq); + SourceRange readSourceRange() { + return Reader->ReadSourceRange(*F, Record, Idx); } /// Read an arbitrary constant value, advancing Idx. diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index 964c9e6ea8a25..ee005ec287708 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -29,7 +29,6 @@ class TypeLoc; /// An object for streaming information to a record. class ASTRecordWriter : public serialization::DataStreamBasicWriter<ASTRecordWriter> { - using LocSeq = SourceLocationSequence; ASTWriter *Writer; ASTWriter::RecordDataImpl *Record; @@ -147,8 +146,8 @@ class ASTRecordWriter void AddFunctionDefinition(const FunctionDecl *FD); /// Emit a source location. - void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) { - return Writer->AddSourceLocation(Loc, *Record, Seq); + void AddSourceLocation(SourceLocation Loc) { + return Writer->AddSourceLocation(Loc, *Record); } void writeSourceLocation(SourceLocation Loc) { AddSourceLocation(Loc); @@ -174,8 +173,8 @@ class ASTRecordWriter } /// Emit a source range. - void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) { - return Writer->AddSourceRange(Range, *Record, Seq); + void AddSourceRange(SourceRange Range) { + return Writer->AddSourceRange(Range, *Record); } void writeBool(bool Value) { @@ -245,7 +244,7 @@ class ASTRecordWriter void AddTypeSourceInfo(TypeSourceInfo *TInfo); /// Emits source location information for a type. Does not emit the type. - void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); + void AddTypeLoc(TypeLoc TL); /// Emits a template argument location info. void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 97679ace8b610..013d613eb2b98 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -115,8 +115,6 @@ class ASTWriter : public ASTDeserializationListener, using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx, serialization::UnsafeQualTypeDenseMapInfo>; - using LocSeq = SourceLocationSequence; - /// The bitstream writer used to emit this precompiled header. llvm::BitstreamWriter &Stream; @@ -733,16 +731,14 @@ class ASTWriter : public ASTDeserializationListener, void AddFileID(FileID FID, RecordDataImpl &Record); /// Emit a source location. - void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, - LocSeq *Seq = nullptr); + void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record); /// Return the raw encodings for source locations. SourceLocationEncoding::RawLocEncoding - getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr); + getRawSourceLocationEncoding(SourceLocation loc); /// Emit a source range. - void AddSourceRange(SourceRange Range, RecordDataImpl &Record, - LocSeq *Seq = nullptr); + void AddSourceRange(SourceRange Range, RecordDataImpl &Record); /// Emit a reference to an identifier. void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record); diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h index 33ca1728fa479..4a068bbf3fd8a 100644 --- a/clang/include/clang/Serialization/SourceLocationEncoding.h +++ b/clang/include/clang/Serialization/SourceLocationEncoding.h @@ -25,8 +25,6 @@ // * C: The macro bit. We rotate it to the lowest bit so that we can save some // space in case the index of the module file is 0. // -// Specially, if the index of the module file is 0, we allow to encode a -// sequence of locations we store only differences between successive elements. // //===----------------------------------------------------------------------===// @@ -38,7 +36,6 @@ #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H namespace clang { -class SourceLocationSequence; /// Serialized encoding of SourceLocations without context. /// Optimized to have small unsigned values (=> small after VBR encoding). @@ -54,119 +51,22 @@ class SourceLocationEncoding { static UIntTy decodeRaw(UIntTy Raw) { return (Raw >> 1) | (Raw << (UIntBits - 1)); } - friend SourceLocationSequence; public: using RawLocEncoding = uint64_t; static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset, - unsigned BaseModuleFileIndex, - SourceLocationSequence * = nullptr); - static std::pair<SourceLocation, unsigned> - decode(RawLocEncoding, SourceLocationSequence * = nullptr); -}; - -/// Serialized encoding of a sequence of SourceLocations. -/// -/// Optimized to produce small values when locations with the sequence are -/// similar. Each element can be delta-encoded against the last nonzero element. -/// -/// Sequences should be started by creating a SourceLocationSequence::State, -/// and then passed around as SourceLocationSequence*. Example: -/// -/// // establishes a sequence -/// void EmitTopLevelThing() { -/// SourceLocationSequence::State Seq; -/// EmitContainedThing(Seq); -/// EmitRecursiveThing(Seq); -/// } -/// -/// // optionally part of a sequence -/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { -/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); -/// } -/// -/// // establishes a sequence if there isn't one already -/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { -/// SourceLocationSequence::State Seq(ParentSeq); -/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); -/// EmitRecursiveThing(Seq); -/// } -/// -class SourceLocationSequence { - using UIntTy = SourceLocation::UIntTy; - using EncodedTy = uint64_t; - constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; - static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); - - // Prev stores the rotated last nonzero location. - UIntTy &Prev; - - // Zig-zag encoding turns small signed integers into small unsigned integers. - // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... - static UIntTy zigZag(UIntTy V) { - UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); - return Sign ^ (V << 1); - } - static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } - - SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} - - EncodedTy encodeRaw(UIntTy Raw) { - if (Raw == 0) - return 0; - UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); - if (Prev == 0) - return Prev = Rotated; - UIntTy Delta = Rotated - Prev; - Prev = Rotated; - // Exactly one 33 bit value is possible! (1 << 32). - // This is because we have two representations of zero: trivial & relative. - return 1 + EncodedTy{zigZag(Delta)}; - } - UIntTy decodeRaw(EncodedTy Encoded) { - if (Encoded == 0) - return 0; - if (Prev == 0) - return SourceLocationEncoding::decodeRaw(Prev = Encoded); - return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); - } - -public: - SourceLocation decode(EncodedTy Encoded) { - return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); - } - EncodedTy encode(SourceLocation Loc) { - return encodeRaw(Loc.getRawEncoding()); - } - - class State; -}; - -/// This object establishes a SourceLocationSequence. -class SourceLocationSequence::State { - UIntTy Prev = 0; - SourceLocationSequence Seq; - -public: - // If Parent is provided and non-null, then this root becomes part of that - // enclosing sequence instead of establishing a new one. - State(SourceLocationSequence *Parent = nullptr) - : Seq(Parent ? Parent->Prev : Prev) {} - - // Implicit conversion for uniform use of roots vs propagated sequences. - operator SourceLocationSequence *() { return &Seq; } + unsigned BaseModuleFileIndex); + static std::pair<SourceLocation, unsigned> decode(RawLocEncoding); }; inline SourceLocationEncoding::RawLocEncoding SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, - unsigned BaseModuleFileIndex, - SourceLocationSequence *Seq) { + unsigned BaseModuleFileIndex) { // If the source location is a local source location, we can try to optimize // the similar sequences to only record the differences. if (!BaseOffset) - return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); - + return encodeRaw(Loc.getRawEncoding()); if (Loc.isInvalid()) return 0; @@ -183,13 +83,11 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, return Encoded; } inline std::pair<SourceLocation, unsigned> -SourceLocationEncoding::decode(RawLocEncoding Encoded, - SourceLocationSequence *Seq) { +SourceLocationEncoding::decode(RawLocEncoding Encoded) { unsigned ModuleFileIndex = Encoded >> 32; if (!ModuleFileIndex) - return {Seq ? Seq->decode(Encoded) - : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)), + return {SourceLocation::getFromRawEncoding(decodeRaw(Encoded)), ModuleFileIndex}; Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32); diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index b696cb2efee3d..7250660dc74a2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1914,10 +1914,9 @@ bool ASTReader::ReadSLocEntry(int ID) { } case SM_SLOC_EXPANSION_ENTRY: { - LocSeq::State Seq; - SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq); - SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq); - SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq); + SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]); + SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2]); + SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3]); SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd, Record[5], Record[4], ID, BaseOffset + Record[0]); @@ -7072,13 +7071,10 @@ QualType ASTReader::readTypeRecord(TypeID ID) { namespace clang { class TypeLocReader : public TypeLocVisitor<TypeLocReader> { - using LocSeq = SourceLocationSequence; - ASTRecordReader &Reader; - LocSeq *Seq; - SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); } - SourceRange readSourceRange() { return Reader.readSourceRange(Seq); } + SourceLocation readSourceLocation() { return Reader.readSourceLocation(); } + SourceRange readSourceRange() { return Reader.readSourceRange(); } TypeSourceInfo *GetTypeSourceInfo() { return Reader.readTypeSourceInfo(); @@ -7093,8 +7089,7 @@ class TypeLocReader : public TypeLocVisitor<TypeLocReader> { } public: - TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq) - : Reader(Reader), Seq(Seq) {} + TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {} // We want compile-time assurance that we've enumerated all of // these, so unfortunately we have to declare them first, then @@ -7458,9 +7453,8 @@ void TypeLocReader::VisitDependentBitIntTypeLoc( TL.setNameLoc(readSourceLocation()); } -void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) { - LocSeq::State Seq(ParentSeq); - TypeLocReader TLR(*this, Seq); +void ASTRecordReader::readTypeLoc(TypeLoc TL) { + TypeLocReader TLR(*this); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLR.Visit(TL); } @@ -10016,9 +10010,9 @@ ASTRecordReader::readNestedNameSpecifierLoc() { } SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record, - unsigned &Idx, LocSeq *Seq) { - SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq); - SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq); + unsigned &Idx) { + SourceLocation beg = ReadSourceLocation(F, Record, Idx); + SourceLocation end = ReadSourceLocation(F, Record, Idx); return SourceRange(beg, end); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 4cca214f8e308..04cbd1ca552b7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -330,19 +330,13 @@ class ASTTypeWriter { }; class TypeLocWriter : public TypeLocVisitor<TypeLocWriter> { - using LocSeq = SourceLocationSequence; - ASTRecordWriter &Record; - LocSeq *Seq; - void addSourceLocation(SourceLocation Loc) { - Record.AddSourceLocation(Loc, Seq); - } - void addSourceRange(SourceRange Range) { Record.AddSourceRange(Range, Seq); } + void addSourceLocation(SourceLocation Loc) { Record.AddSourceLocation(Loc); } + void addSourceRange(SourceRange Range) { Record.AddSourceRange(Range); } public: - TypeLocWriter(ASTRecordWriter &Record, LocSeq *Seq) - : Record(Record), Seq(Seq) {} + TypeLocWriter(ASTRecordWriter &Record) : Record(Record) {} #define ABSTRACT_TYPELOC(CLASS, PARENT) #define TYPELOC(CLASS, PARENT) \ @@ -2449,13 +2443,12 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr) { SLocEntryOffsets.push_back(Offset); // Starting offset of this entry within this module, so skip the dummy. Record.push_back(getAdjustedOffset(SLoc->getOffset()) - 2); - LocSeq::State Seq; - AddSourceLocation(Expansion.getSpellingLoc(), Record, Seq); - AddSourceLocation(Expansion.getExpansionLocStart(), Record, Seq); + AddSourceLocation(Expansion.getSpellingLoc(), Record); + AddSourceLocation(Expansion.getExpansionLocStart(), Record); AddSourceLocation(Expansion.isMacroArgExpansion() ? SourceLocation() : Expansion.getExpansionLocEnd(), - Record, Seq); + Record); Record.push_back(Expansion.isExpansionTokenRange()); // Compute the token length for this macro expansion. @@ -6653,7 +6646,7 @@ void ASTWriter::AddFileID(FileID FID, RecordDataImpl &Record) { } SourceLocationEncoding::RawLocEncoding -ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { +ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc) { unsigned BaseOffset = 0; unsigned ModuleFileIndex = 0; @@ -6672,19 +6665,17 @@ ASTWriter::getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq) { assert(&getChain()->getModuleManager()[F->Index] == F); } - return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex, Seq); + return SourceLocationEncoding::encode(Loc, BaseOffset, ModuleFileIndex); } -void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, - SourceLocationSequence *Seq) { +void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record) { Loc = getAdjustedLocation(Loc); - Record.push_back(getRawSourceLocationEncoding(Loc, Seq)); + Record.push_back(getRawSourceLocationEncoding(Loc)); } -void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record, - SourceLocationSequence *Seq) { - AddSourceLocation(Range.getBegin(), Record, Seq); - AddSourceLocation(Range.getEnd(), Record, Seq); +void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) { + AddSourceLocation(Range.getBegin(), Record); + AddSourceLocation(Range.getEnd(), Record); } void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) { @@ -6804,9 +6795,8 @@ void ASTRecordWriter::AddTypeSourceInfo(TypeSourceInfo *TInfo) { AddTypeLoc(TInfo->getTypeLoc()); } -void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) { - LocSeq::State Seq(OuterSeq); - TypeLocWriter TLW(*this, Seq); +void ASTRecordWriter::AddTypeLoc(TypeLoc TL) { + TypeLocWriter TLW(*this); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLW.Vis... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/145670 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits