https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/70381
>From 3fe63f81fcb999681daa11b2890c82fda3aaeef5 Mon Sep 17 00:00:00 2001 From: serge-sans-paille <sguel...@mozilla.com> Date: Thu, 26 Oct 2023 22:31:43 +0200 Subject: [PATCH 1/2] [clang] Change representation of CurLexerKind Previous representation used an enumeration combined to a switch to dispatch to the appropriate lexer. Use function pointer so that the dispatching is just an indirect call, which is actually better because lexing is a costly task compared to a function call. This also makes the code slightly cleaner, speedup on compile time tracker are consistent and range form -0.05% to -0.20% for NewPM-O0-g, see https://llvm-compile-time-tracker.com/compare.php?from=f9906508bc4f05d3950e2219b4c56f6c078a61ef&to=608c85ec1283638db949d73e062bcc3355001ce4&stat=instructions:u Considering just the preprocessing task, preprocessing the sqlite amalgametion takes -0.6% instructions (according to valgrind --tool=callgrind) --- clang/include/clang/Lex/Preprocessor.h | 46 +++++++++++----- clang/lib/Lex/PPCaching.cpp | 8 +-- clang/lib/Lex/PPLexerChange.cpp | 20 +++---- clang/lib/Lex/Preprocessor.cpp | 67 ++++++----------------- clang/utils/ClangVisualizers/clang.natvis | 2 +- 5 files changed, 62 insertions(+), 81 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 18d88407ae12c90..634d3924aa2248b 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -751,13 +751,8 @@ class Preprocessor { std::unique_ptr<TokenLexer> CurTokenLexer; /// The kind of lexer we're currently working with. - enum CurLexerKind { - CLK_Lexer, - CLK_TokenLexer, - CLK_CachingLexer, - CLK_DependencyDirectivesLexer, - CLK_LexAfterModuleImport - } CurLexerKind = CLK_Lexer; + typedef bool (*LexerCallback)(Preprocessor &, Token &); + LexerCallback CurLexerCallback = &CLK_Lexer; /// If the current lexer is for a submodule that is being built, this /// is that submodule. @@ -767,7 +762,7 @@ class Preprocessor { /// \#included, and macros currently being expanded from, not counting /// CurLexer/CurTokenLexer. struct IncludeStackInfo { - enum CurLexerKind CurLexerKind; + LexerCallback CurLexerCallback; Module *TheSubmodule; std::unique_ptr<Lexer> TheLexer; PreprocessorLexer *ThePPLexer; @@ -776,12 +771,12 @@ class Preprocessor { // The following constructors are completely useless copies of the default // versions, only needed to pacify MSVC. - IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule, + IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule, std::unique_ptr<Lexer> &&TheLexer, PreprocessorLexer *ThePPLexer, std::unique_ptr<TokenLexer> &&TheTokenLexer, ConstSearchDirIterator TheDirLookup) - : CurLexerKind(std::move(CurLexerKind)), + : CurLexerCallback(std::move(CurLexerCallback)), TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)), ThePPLexer(std::move(ThePPLexer)), TheTokenLexer(std::move(TheTokenLexer)), @@ -1901,7 +1896,7 @@ class Preprocessor { /// Determine whether it's possible for a future call to Lex to produce an /// annotation token created by a previous call to EnterAnnotationToken. bool mightHavePendingAnnotationTokens() { - return CurLexerKind != CLK_Lexer; + return CurLexerCallback != CLK_Lexer; } /// Update the current token to represent the provided @@ -1914,7 +1909,7 @@ class Preprocessor { /// Recompute the current lexer kind based on the CurLexer/ /// CurTokenLexer pointers. - void recomputeCurLexerKind(); + void recomputeCurLexerCallback(); /// Returns true if incremental processing is enabled bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } @@ -2430,8 +2425,9 @@ class Preprocessor { friend void TokenLexer::ExpandFunctionArguments(); void PushIncludeMacroStack() { - assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer"); - IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule, + assert(CurLexerCallback != CLK_CachingLexer && + "cannot push a caching lexer"); + IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule, std::move(CurLexer), CurPPLexer, std::move(CurTokenLexer), CurDirLookup); CurPPLexer = nullptr; @@ -2443,7 +2439,7 @@ class Preprocessor { CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer); CurDirLookup = IncludeMacroStack.back().TheDirLookup; CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule; - CurLexerKind = IncludeMacroStack.back().CurLexerKind; + CurLexerCallback = IncludeMacroStack.back().CurLexerCallback; IncludeMacroStack.pop_back(); } @@ -2899,6 +2895,26 @@ class Preprocessor { /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage" /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + +private: + /// Helper functions to forward lexing to the actual lexer. They all share the + /// same signature. + static bool CLK_Lexer(Preprocessor &P, Token &Result) { + return P.CurLexer->Lex(Result); + } + static bool CLK_TokenLexer(Preprocessor &P, Token &Result) { + return P.CurTokenLexer->Lex(Result); + } + static bool CLK_CachingLexer(Preprocessor &P, Token &Result) { + P.CachingLex(Result); + return true; + } + static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) { + return P.CurLexer->LexDependencyDirectiveToken(Result); + } + static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) { + return P.LexAfterModuleImport(Result); + } }; /// Abstract base class that describes a handler that will receive diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp index e05e52ba9bb5362..b8219835a588bea 100644 --- a/clang/lib/Lex/PPCaching.cpp +++ b/clang/lib/Lex/PPCaching.cpp @@ -42,7 +42,7 @@ void Preprocessor::Backtrack() { && "EnableBacktrackAtThisPos was not called!"); CachedLexPos = BacktrackPositions.back(); BacktrackPositions.pop_back(); - recomputeCurLexerKind(); + recomputeCurLexerCallback(); } void Preprocessor::CachingLex(Token &Result) { @@ -88,7 +88,7 @@ void Preprocessor::EnterCachingLexMode() { "entered caching lex mode while lexing something else"); if (InCachingLexMode()) { - assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind"); + assert(CurLexerCallback == CLK_CachingLexer && "Unexpected lexer kind"); return; } @@ -96,9 +96,9 @@ void Preprocessor::EnterCachingLexMode() { } void Preprocessor::EnterCachingLexModeUnchecked() { - assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode"); + assert(CurLexerCallback != CLK_CachingLexer && "already in caching lex mode"); PushIncludeMacroStack(); - CurLexerKind = CLK_CachingLexer; + CurLexerCallback = CLK_CachingLexer; } diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index b8575e1adfc5b3f..1378880f8b0e5e0 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -122,10 +122,10 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, CurPPLexer = TheLexer; CurDirLookup = CurDir; CurLexerSubmodule = nullptr; - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = TheLexer->isDependencyDirectivesLexer() - ? CLK_DependencyDirectivesLexer - : CLK_Lexer; + if (CurLexerCallback != CLK_LexAfterModuleImport) + CurLexerCallback = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { @@ -161,8 +161,8 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_TokenLexer; + if (CurLexerCallback != CLK_LexAfterModuleImport) + CurLexerCallback = CLK_TokenLexer; } /// EnterTokenStream - Add a "macro" context to the top of the include stack, @@ -180,7 +180,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject) { - if (CurLexerKind == CLK_CachingLexer) { + if (CurLexerCallback == CLK_CachingLexer) { if (CachedLexPos < CachedTokens.size()) { assert(IsReinject && "new tokens in the middle of cached stream"); // We're entering tokens into the middle of our cached token stream. We @@ -216,8 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, PushIncludeMacroStack(); CurDirLookup = nullptr; CurTokenLexer = std::move(TokLexer); - if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_TokenLexer; + if (CurLexerCallback != CLK_LexAfterModuleImport) + CurLexerCallback = CLK_TokenLexer; } /// Compute the relative path that names the given file relative to @@ -452,7 +452,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { CurLexer.reset(); CurPPLexer = nullptr; - recomputeCurLexerKind(); + recomputeCurLexerCallback(); return true; } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index ede4c51487ffbe7..f791b4d2363c9fe 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -380,15 +380,15 @@ StringRef Preprocessor::getLastMacroWithSpelling( return BestSpelling; } -void Preprocessor::recomputeCurLexerKind() { +void Preprocessor::recomputeCurLexerCallback() { if (CurLexer) - CurLexerKind = CurLexer->isDependencyDirectivesLexer() - ? CLK_DependencyDirectivesLexer - : CLK_Lexer; + CurLexerCallback = CurLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; else if (CurTokenLexer) - CurLexerKind = CLK_TokenLexer; + CurLexerCallback = CLK_TokenLexer; else - CurLexerKind = CLK_CachingLexer; + CurLexerCallback = CLK_CachingLexer; } bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File, @@ -643,23 +643,7 @@ void Preprocessor::SkipTokensWhileUsingPCH() { while (true) { bool InPredefines = (CurLexer && CurLexer->getFileID() == getPredefinesFileID()); - switch (CurLexerKind) { - case CLK_Lexer: - CurLexer->Lex(Tok); - break; - case CLK_TokenLexer: - CurTokenLexer->Lex(Tok); - break; - case CLK_CachingLexer: - CachingLex(Tok); - break; - case CLK_DependencyDirectivesLexer: - CurLexer->LexDependencyDirectiveToken(Tok); - break; - case CLK_LexAfterModuleImport: - LexAfterModuleImport(Tok); - break; - } + CurLexerCallback(*this, Tok); if (Tok.is(tok::eof) && !InPredefines) { ReachedMainFileEOF = true; break; @@ -868,12 +852,12 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { Identifier.is(tok::kw_import)) && !InMacroArgs && !DisableMacroExpansion && (getLangOpts().Modules || getLangOpts().DebuggerSupport) && - CurLexerKind != CLK_CachingLexer) { + CurLexerCallback != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); NamedModuleImportPath.clear(); IsAtImport = true; ModuleImportExpectsIdentifier = true; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; } return true; } @@ -882,27 +866,8 @@ void Preprocessor::Lex(Token &Result) { ++LexLevel; // We loop here until a lex function returns a token; this avoids recursion. - bool ReturnedToken; - do { - switch (CurLexerKind) { - case CLK_Lexer: - ReturnedToken = CurLexer->Lex(Result); - break; - case CLK_TokenLexer: - ReturnedToken = CurTokenLexer->Lex(Result); - break; - case CLK_CachingLexer: - CachingLex(Result); - ReturnedToken = true; - break; - case CLK_DependencyDirectivesLexer: - ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result); - break; - case CLK_LexAfterModuleImport: - ReturnedToken = LexAfterModuleImport(Result); - break; - } - } while (!ReturnedToken); + while (!CurLexerCallback(*this, Result)) + ; if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure) return; @@ -965,7 +930,7 @@ void Preprocessor::Lex(Token &Result) { NamedModuleImportPath.clear(); IsAtImport = false; ModuleImportExpectsIdentifier = true; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; } break; } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { @@ -1166,7 +1131,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { /// We respond to a pp-import by importing macros from the named module. bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. - recomputeCurLexerKind(); + recomputeCurLexerCallback(); // Lex the next token. The header-name lexing rules are used at the start of // a pp-import. @@ -1183,7 +1148,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { Name += ":"; NamedModuleImportPath.push_back( {getIdentifierInfo(Name), Result.getLocation()}); - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; return true; } } else { @@ -1283,7 +1248,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { NamedModuleImportPath.push_back( std::make_pair(Result.getIdentifierInfo(), Result.getLocation())); ModuleImportExpectsIdentifier = false; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; return true; } @@ -1292,7 +1257,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // attribute-specifier-seq here under the Standard C++ Modules.) if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { ModuleImportExpectsIdentifier = true; - CurLexerKind = CLK_LexAfterModuleImport; + CurLexerCallback = CLK_LexAfterModuleImport; return true; } diff --git a/clang/utils/ClangVisualizers/clang.natvis b/clang/utils/ClangVisualizers/clang.natvis index cbb63dc08de2338..44b43ec05d8af59 100644 --- a/clang/utils/ClangVisualizers/clang.natvis +++ b/clang/utils/ClangVisualizers/clang.natvis @@ -817,7 +817,7 @@ For later versions of Visual Studio, no setup is required--> <DisplayString IncludeView="cached"> {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na}</DisplayString> <DisplayString Condition="CurLexer._Mypair._Myval2 != 0">{CurLexer._Mypair._Myval2,na}</DisplayString> <DisplayString Condition="CurTokenLexer._Mypair._Myval2 != 0">Expanding Macro: {CurTokenLexer._Mypair._Myval2,na}</DisplayString> - <!-- Can't use CurLexerKind because natvis sees the type rather than the variable --> + <!-- Can't use CurLexerCallback because natvis sees the type rather than the variable --> <DisplayString Condition="IncludeMacroStack._Mypair._Myval2._Mylast - IncludeMacroStack._Mypair._Myval2._Myfirst"> {this,view(cached)} </DisplayString> >From 7ad697255b075ecb18d09bdb6b465f4e1956909b Mon Sep 17 00:00:00 2001 From: serge-sans-paille <serge.guel...@telecom-bretagne.eu> Date: Sun, 29 Oct 2023 18:17:55 +0000 Subject: [PATCH 2/2] fixup! [clang] Change representation of CurLexerKind Co-authored-by: cor3ntin <corentinja...@gmail.com> --- clang/include/clang/Lex/Preprocessor.h | 2 +- clang/lib/Lex/PPCaching.cpp | 2 +- clang/lib/Lex/PPLexerChange.cpp | 2 +- clang/lib/Lex/Preprocessor.cpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 634d3924aa2248b..3019143731db3ba 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1909,7 +1909,7 @@ class Preprocessor { /// Recompute the current lexer kind based on the CurLexer/ /// CurTokenLexer pointers. - void recomputeCurLexerCallback(); + void recomputeCurLexerKind(); /// Returns true if incremental processing is enabled bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; } diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp index b8219835a588bea..f38ff62ebf437ce 100644 --- a/clang/lib/Lex/PPCaching.cpp +++ b/clang/lib/Lex/PPCaching.cpp @@ -42,7 +42,7 @@ void Preprocessor::Backtrack() { && "EnableBacktrackAtThisPos was not called!"); CachedLexPos = BacktrackPositions.back(); BacktrackPositions.pop_back(); - recomputeCurLexerCallback(); + recomputeCurLexerKind(); } void Preprocessor::CachingLex(Token &Result) { diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 1378880f8b0e5e0..3b1b6df1dbae4e6 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -452,7 +452,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { CurLexer.reset(); CurPPLexer = nullptr; - recomputeCurLexerCallback(); + recomputeCurLexerKind(); return true; } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index f791b4d2363c9fe..e37e370f5526b38 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -380,7 +380,7 @@ StringRef Preprocessor::getLastMacroWithSpelling( return BestSpelling; } -void Preprocessor::recomputeCurLexerCallback() { +void Preprocessor::recomputeCurLexerKind() { if (CurLexer) CurLexerCallback = CurLexer->isDependencyDirectivesLexer() ? CLK_DependencyDirectivesLexer @@ -1131,7 +1131,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) { /// We respond to a pp-import by importing macros from the named module. bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. - recomputeCurLexerCallback(); + recomputeCurLexerKind(); // Lex the next token. The header-name lexing rules are used at the start of // a pp-import. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits