Author: Alex Lorenz Date: 2022-02-11T15:53:16-08:00 New Revision: 3f05192c4c40bc79b1db431a7a36baaa9491eebb
URL: https://github.com/llvm/llvm-project/commit/3f05192c4c40bc79b1db431a7a36baaa9491eebb DIFF: https://github.com/llvm/llvm-project/commit/3f05192c4c40bc79b1db431a7a36baaa9491eebb.diff LOG: Revert "[Preprocessor] Reduce the memory overhead of `#define` directives" This reverts commit 0d9b91524ea4db3760791bba15773c386a26d8ec. This change broke LLDB's build. I will need to recommit after fixing LLDB. Added: Modified: clang/include/clang/Lex/MacroInfo.h clang/lib/Lex/MacroInfo.cpp clang/lib/Lex/PPDirectives.cpp clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp clang/unittests/Lex/CMakeLists.txt Removed: clang/unittests/Lex/PPMemoryAllocationsTest.cpp ################################################################################ diff --git a/clang/include/clang/Lex/MacroInfo.h b/clang/include/clang/Lex/MacroInfo.h index 1947bc8fc509e..0347a7a37186b 100644 --- a/clang/include/clang/Lex/MacroInfo.h +++ b/clang/include/clang/Lex/MacroInfo.h @@ -54,14 +54,11 @@ class MacroInfo { /// macro, this includes the \c __VA_ARGS__ identifier on the list. IdentifierInfo **ParameterList = nullptr; - /// This is the list of tokens that the macro is defined to. - const Token *ReplacementTokens = nullptr; - /// \see ParameterList unsigned NumParameters = 0; - /// \see ReplacementTokens - unsigned NumReplacementTokens = 0; + /// This is the list of tokens that the macro is defined to. + SmallVector<Token, 8> ReplacementTokens; /// Length in characters of the macro definition. mutable unsigned DefinitionLength; @@ -233,47 +230,26 @@ class MacroInfo { bool isWarnIfUnused() const { return IsWarnIfUnused; } /// Return the number of tokens that this macro expands to. - unsigned getNumTokens() const { return NumReplacementTokens; } + unsigned getNumTokens() const { return ReplacementTokens.size(); } const Token &getReplacementToken(unsigned Tok) const { - assert(Tok < NumReplacementTokens && "Invalid token #"); + assert(Tok < ReplacementTokens.size() && "Invalid token #"); return ReplacementTokens[Tok]; } - using const_tokens_iterator = const Token *; + using tokens_iterator = SmallVectorImpl<Token>::const_iterator; - const_tokens_iterator tokens_begin() const { return ReplacementTokens; } - const_tokens_iterator tokens_end() const { - return ReplacementTokens + NumReplacementTokens; - } - bool tokens_empty() const { return NumReplacementTokens == 0; } - ArrayRef<Token> tokens() const { - return llvm::makeArrayRef(ReplacementTokens, NumReplacementTokens); - } + tokens_iterator tokens_begin() const { return ReplacementTokens.begin(); } + tokens_iterator tokens_end() const { return ReplacementTokens.end(); } + bool tokens_empty() const { return ReplacementTokens.empty(); } + ArrayRef<Token> tokens() const { return ReplacementTokens; } - llvm::MutableArrayRef<Token> - allocateTokens(unsigned NumTokens, llvm::BumpPtrAllocator &PPAllocator) { - assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 && - "Token list already allocated!"); - NumReplacementTokens = NumTokens; - Token *NewReplacementTokens = PPAllocator.Allocate<Token>(NumTokens); - ReplacementTokens = NewReplacementTokens; - return llvm::makeMutableArrayRef(NewReplacementTokens, NumTokens); - } - - void setTokens(ArrayRef<Token> Tokens, llvm::BumpPtrAllocator &PPAllocator) { + /// Add the specified token to the replacement text for the macro. + void AddTokenToBody(const Token &Tok) { assert( !IsDefinitionLengthCached && "Changing replacement tokens after definition length got calculated"); - assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 && - "Token list already set!"); - if (Tokens.empty()) - return; - - NumReplacementTokens = Tokens.size(); - Token *NewReplacementTokens = PPAllocator.Allocate<Token>(Tokens.size()); - std::copy(Tokens.begin(), Tokens.end(), NewReplacementTokens); - ReplacementTokens = NewReplacementTokens; + ReplacementTokens.push_back(Tok); } /// Return true if this macro is enabled. diff --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp index f5702130d1819..1ccd140364aeb 100644 --- a/clang/lib/Lex/MacroInfo.cpp +++ b/clang/lib/Lex/MacroInfo.cpp @@ -28,25 +28,6 @@ using namespace clang; -namespace { - -// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer -// and 4 byte SourceLocation. -template <int> class MacroInfoSizeChecker { -public: - constexpr static bool AsExpected = true; -}; -template <> class MacroInfoSizeChecker<8> { -public: - constexpr static bool AsExpected = - sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2); -}; - -static_assert(MacroInfoSizeChecker<sizeof(void *)>::AsExpected, - "Unexpected size of MacroInfo"); - -} // end namespace - MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false), IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false), @@ -58,7 +39,6 @@ unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const { assert(!IsDefinitionLengthCached); IsDefinitionLengthCached = true; - ArrayRef<Token> ReplacementTokens = tokens(); if (ReplacementTokens.empty()) return (DefinitionLength = 0); @@ -96,7 +76,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, bool Lexically = !Syntactically; // Check # tokens in replacement, number of args, and various flags all match. - if (getNumTokens() != Other.getNumTokens() || + if (ReplacementTokens.size() != Other.ReplacementTokens.size() || getNumParams() != Other.getNumParams() || isFunctionLike() != Other.isFunctionLike() || isC99Varargs() != Other.isC99Varargs() || @@ -112,7 +92,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP, } // Check all the tokens. - for (unsigned i = 0; i != NumReplacementTokens; ++i) { + for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { const Token &A = ReplacementTokens[i]; const Token &B = Other.ReplacementTokens[i]; if (A.getKind() != B.getKind()) @@ -177,7 +157,7 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const { } bool First = true; - for (const Token &Tok : tokens()) { + for (const Token &Tok : ReplacementTokens) { // Leading space is semantically meaningful in a macro definition, // so preserve it in the dump output. if (First || Tok.hasLeadingSpace()) diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index ae5e4fc75c157..f3aefdd22b514 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -2711,14 +2711,12 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( if (!Tok.is(tok::eod)) LastTok = Tok; - SmallVector<Token, 16> Tokens; - // Read the rest of the macro body. if (MI->isObjectLike()) { // Object-like macros are very simple, just read their body. while (Tok.isNot(tok::eod)) { LastTok = Tok; - Tokens.push_back(Tok); + MI->AddTokenToBody(Tok); // Get the next token of the macro. LexUnexpandedToken(Tok); } @@ -2733,7 +2731,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( LastTok = Tok; if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) { - Tokens.push_back(Tok); + MI->AddTokenToBody(Tok); if (VAOCtx.isVAOptToken(Tok)) { // If we're already within a VAOPT, emit an error. @@ -2747,7 +2745,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use); return nullptr; } - Tokens.push_back(Tok); + MI->AddTokenToBody(Tok); VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation()); LexUnexpandedToken(Tok); if (Tok.is(tok::hashhash)) { @@ -2758,10 +2756,10 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( } else if (VAOCtx.isInVAOpt()) { if (Tok.is(tok::r_paren)) { if (VAOCtx.sawClosingParen()) { - assert(Tokens.size() >= 3 && - "Must have seen at least __VA_OPT__( " - "and a subsequent tok::r_paren"); - if (Tokens[Tokens.size() - 2].is(tok::hashhash)) { + const unsigned NumTokens = MI->getNumTokens(); + assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( " + "and a subsequent tok::r_paren"); + if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) { Diag(Tok, diag::err_vaopt_paste_at_end); return nullptr; } @@ -2780,7 +2778,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( // things. if (getLangOpts().TraditionalCPP) { Tok.setKind(tok::unknown); - Tokens.push_back(Tok); + MI->AddTokenToBody(Tok); // Get the next token of the macro. LexUnexpandedToken(Tok); @@ -2796,16 +2794,17 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( LexUnexpandedToken(Tok); if (Tok.is(tok::eod)) { - Tokens.push_back(LastTok); + MI->AddTokenToBody(LastTok); break; } - if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && - Tokens[Tokens.size() - 1].is(tok::comma)) + unsigned NumTokens = MI->getNumTokens(); + if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ && + MI->getReplacementToken(NumTokens-1).is(tok::comma)) MI->setHasCommaPasting(); // Things look ok, add the '##' token to the macro. - Tokens.push_back(LastTok); + MI->AddTokenToBody(LastTok); continue; } @@ -2824,7 +2823,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( // confused. if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) { LastTok.setKind(tok::unknown); - Tokens.push_back(LastTok); + MI->AddTokenToBody(LastTok); continue; } else { Diag(Tok, diag::err_pp_stringize_not_parameter) @@ -2834,13 +2833,13 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( } // Things look ok, add the '#' and param name tokens to the macro. - Tokens.push_back(LastTok); + MI->AddTokenToBody(LastTok); // If the token following '#' is VAOPT, let the next iteration handle it // and check it for correctness, otherwise add the token and prime the // loop with the next one. if (!VAOCtx.isVAOptToken(Tok)) { - Tokens.push_back(Tok); + MI->AddTokenToBody(Tok); LastTok = Tok; // Get the next token of the macro. @@ -2856,8 +2855,6 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( } } MI->setDefinitionEndLoc(LastTok.getLocation()); - - MI->setTokens(Tokens, BP); return MI; } /// HandleDefineDirective - Implements \#define. This consumes the entire macro @@ -3010,7 +3007,7 @@ void Preprocessor::HandleDefineDirective( Tok.startToken(); Tok.setKind(tok::kw__Static_assert); Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert")); - MI->setTokens({Tok}, BP); + MI->AddTokenToBody(Tok); (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI); } } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 5a09d802b2d8a..0ea904b275f25 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1694,7 +1694,6 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { RecordData Record; SmallVector<IdentifierInfo*, 16> MacroParams; MacroInfo *Macro = nullptr; - llvm::MutableArrayRef<Token> MacroTokens; while (true) { // Advance to the next record, but if we get to the end of the block, don't @@ -1749,8 +1748,7 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex)); MI->setIsUsed(Record[NextIndex++]); MI->setUsedForHeaderGuard(Record[NextIndex++]); - MacroTokens = MI->allocateTokens(Record[NextIndex++], - PP.getPreprocessorAllocator()); + if (RecType == PP_MACRO_FUNCTION_LIKE) { // Decode function-like macro info. bool isC99VarArgs = Record[NextIndex++]; @@ -1795,14 +1793,10 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) { // If we see a TOKEN before a PP_MACRO_*, then the file is // erroneous, just pretend we didn't see this. if (!Macro) break; - if (MacroTokens.empty()) { - Error("unexpected number of macro tokens for a macro in AST file"); - return Macro; - } unsigned Idx = 0; - MacroTokens[0] = ReadToken(F, Record, Idx); - MacroTokens = MacroTokens.drop_front(); + Token Tok = ReadToken(F, Record, Idx); + Macro->AddTokenToBody(Tok); break; } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a126e12bcbd99..763fc9537c04b 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2431,7 +2431,6 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { AddSourceLocation(MI->getDefinitionEndLoc(), Record); Record.push_back(MI->isUsed()); Record.push_back(MI->isUsedForHeaderGuard()); - Record.push_back(MI->getNumTokens()); unsigned Code; if (MI->isObjectLike()) { Code = PP_MACRO_OBJECT_LIKE; diff --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt index 99024ba896ac8..97a4e5e44608c 100644 --- a/clang/unittests/Lex/CMakeLists.txt +++ b/clang/unittests/Lex/CMakeLists.txt @@ -9,7 +9,6 @@ add_clang_unittest(LexTests LexerTest.cpp PPCallbacksTest.cpp PPConditionalDirectiveRecordTest.cpp - PPMemoryAllocationsTest.cpp ) clang_target_link_libraries(LexTests diff --git a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp b/clang/unittests/Lex/PPMemoryAllocationsTest.cpp deleted file mode 100644 index 894d94e149a9f..0000000000000 --- a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp +++ /dev/null @@ -1,97 +0,0 @@ -//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===--------------------------------------------------------------===// - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TargetInfo.h" -#include "clang/Basic/TargetOptions.h" -#include "clang/Lex/HeaderSearch.h" -#include "clang/Lex/HeaderSearchOptions.h" -#include "clang/Lex/ModuleLoader.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "gtest/gtest.h" - -using namespace clang; - -namespace { - -class PPMemoryAllocationsTest : public ::testing::Test { -protected: - PPMemoryAllocationsTest() - : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), - Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), - SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { - TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; - Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); - } - - FileSystemOptions FileMgrOpts; - FileManager FileMgr; - IntrusiveRefCntPtr<DiagnosticIDs> DiagID; - DiagnosticsEngine Diags; - SourceManager SourceMgr; - LangOptions LangOpts; - std::shared_ptr<TargetOptions> TargetOpts; - IntrusiveRefCntPtr<TargetInfo> Target; -}; - -TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) { - std::string Source; - size_t NumMacros = 1000000; - { - llvm::raw_string_ostream SourceOS(Source); - - // Create a combination of 1 or 3 token macros. - for (size_t I = 0; I < NumMacros; ++I) { - SourceOS << "#define MACRO_ID_" << I << " "; - if ((I % 2) == 0) - SourceOS << "(" << I << ")"; - else - SourceOS << I; - SourceOS << "\n"; - } - } - - std::unique_ptr<llvm::MemoryBuffer> Buf = - llvm::MemoryBuffer::getMemBuffer(Source); - SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); - - TrivialModuleLoader ModLoader; - HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr, - Diags, LangOpts, Target.get()); - Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts, - SourceMgr, HeaderInfo, ModLoader, - /*IILookup =*/nullptr, - /*OwnsHeaderSearch =*/false); - PP.Initialize(*Target); - PP.EnterMainSourceFile(); - - while (1) { - Token tok; - PP.Lex(tok); - if (tok.is(tok::eof)) - break; - } - - size_t NumAllocated = PP.getPreprocessorAllocator().getBytesAllocated(); - float BytesPerDefine = float(NumAllocated) / float(NumMacros); - llvm::errs() << "Num preprocessor allocations for " << NumMacros - << " #define: " << NumAllocated << "\n"; - llvm::errs() << "Bytes per #define: " << BytesPerDefine << "\n"; - // On arm64-apple-macos, we get around 120 bytes per define. - // Assume a reasonable upper bound based on that number that we don't want - // to exceed when storing information about a macro #define with 1 or 3 - // tokens. - EXPECT_LT(BytesPerDefine, 130.0f); -} - -} // anonymous namespace _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits