llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (yronglin) <details> <summary>Changes</summary> Depends on [[clang][Preprocessor] Add peekNextPPToken, makes look ahead next token without side-effects](https://github.com/llvm/llvm-project/pull/143898). This PR fix the performance regression that introduced in https://github.com/llvm/llvm-project/pull/144233. The original PR(https://github.com/llvm/llvm-project/pull/144233) handle the first pp-token in the main source file in the macro definition/expansion and `Lexer::Lex`, but the lexer is almost always on the hot path, we may hit a performance regression. In this PR, we handle the first pp-token in `Preprocessor::EnterMainSourceFile`. --- Full diff: https://github.com/llvm/llvm-project/pull/145244.diff 10 Files Affected: - (modified) clang/include/clang/Lex/Lexer.h (+5-5) - (modified) clang/include/clang/Lex/Preprocessor.h (+40-16) - (modified) clang/include/clang/Lex/TokenLexer.h (+3-4) - (modified) clang/lib/Lex/Lexer.cpp (+13-17) - (modified) clang/lib/Lex/PPDirectives.cpp (+2-5) - (modified) clang/lib/Lex/PPMacroExpansion.cpp (-41) - (modified) clang/lib/Lex/Preprocessor.cpp (+17-2) - (modified) clang/lib/Lex/TokenLexer.cpp (+5-5) - (modified) clang/lib/Sema/SemaModule.cpp (+3-5) - (modified) clang/unittests/Lex/LexerTest.cpp (+19-15) ``````````diff diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index ca812ba1583fb..7a178b5b5c85b 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer { //===--------------------------------------------------------------------===// // Context that changes as the file is lexed. // NOTE: any state that mutates when in raw mode must have save/restore code - // in Lexer::isNextPPTokenLParen. + // in Lexer::peekNextPPToken. // BufferPtr - Current pointer into the buffer. This is the next character // to be lexed. @@ -645,10 +645,10 @@ class Lexer : public PreprocessorLexer { BufferPtr = TokEnd; } - /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a - /// tok::l_paren token, 0 if it is something else and 2 if there are no more - /// tokens in the buffer controlled by this lexer. - unsigned isNextPPTokenLParen(); + /// peekNextPPToken - Return std::nullopt if there are no more tokens in the + /// buffer controlled by this lexer, otherwise return the next unexpanded + /// token. + std::optional<Token> peekNextPPToken(); //===--------------------------------------------------------------------===// // Lexer character reading interfaces. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 47830b428c8ad..a3cf18f21ca40 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -351,7 +351,7 @@ class Preprocessor { bool LastTokenWasAt = false; /// First pp-token in current translation unit. - std::optional<Token> FirstPPToken; + SourceLocation FirstPPTokenLoc; /// A position within a C++20 import-seq. class StdCXXImportSeq { @@ -1769,20 +1769,13 @@ class Preprocessor { std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, bool ForHasEmbed); - /// Whether the preprocessor already seen the first pp-token in main file. - bool hasSeenMainFileFirstPPToken() const { return FirstPPToken.has_value(); } - - /// Record first pp-token and check if it has a Token::FirstPPToken flag. - void HandleMainFileFirstPPToken(const Token &Tok) { - if (!hasSeenMainFileFirstPPToken() && Tok.isFirstPPToken() && - SourceMgr.isWrittenInMainFile(Tok.getLocation())) - FirstPPToken = Tok; + /// Get the start location of the first pp-token in main file. + SourceLocation getMainFileFirstPPTokenLoc() const { + assert(FirstPPTokenLoc.isValid() && + "Did not see the first pp-token in the main file"); + return FirstPPTokenLoc; } - Token getMainFileFirstPPToken() const { - assert(FirstPPToken && "First main file pp-token doesn't exists"); - return *FirstPPToken; - } bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); @@ -2302,10 +2295,41 @@ class Preprocessor { } } - /// Determine whether the next preprocessor token to be - /// lexed is a '('. If so, consume the token and return true, if not, this + /// Check whether the next pp-token is one of the specificed token kind. this /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen(); + template <tok::TokenKind K, tok::TokenKind... Ks> bool isNextPPTokenOneOf() { + // Do some quick tests for rejection cases. + std::optional<Token> Val; + if (CurLexer) + Val = CurLexer->peekNextPPToken(); + else + Val = CurTokenLexer->peekNextPPToken(); + + if (!Val) { + // We have run off the end. If it's a source file we don't + // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the + // macro stack. + if (CurPPLexer) + return false; + for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { + if (Entry.TheLexer) + Val = Entry.TheLexer->peekNextPPToken(); + else + Val = Entry.TheTokenLexer->peekNextPPToken(); + + if (Val) + break; + + // Ran off the end of a source file? + if (Entry.ThePPLexer) + return false; + } + } + + // Okay, we found the token and return. Otherwise we found the end of the + // translation unit. + return Val->is(K) || (... || Val->is(Ks)); + } private: /// Identifiers used for SEH handling in Borland. These are only diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 4d229ae610674..777b4e6266c71 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,10 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the - /// expansion stack, return 2. If the next unexpanded token is a '(', return - /// 1, otherwise return 0. - unsigned isNextTokenLParen() const; + /// If the next token lexed will pop this macro off the expansion stack, + /// return std::nullopt, otherwise return the next unexpanded token. + std::optional<Token> peekNextPPToken() const; /// Lex and return a token from this macro stream. bool Lex(Token &Tok); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b61ea3b1614c7..42ea7edf3aaad 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3202,18 +3202,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { return PP->HandleEndOfFile(Result, isPragmaLexer()); } -/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from -/// the specified lexer will return a tok::l_paren token, 0 if it is something -/// else and 2 if there are no more tokens in the buffer controlled by the -/// lexer. -unsigned Lexer::isNextPPTokenLParen() { +/// peekNextPPToken - Return std::nullopt if there are no more tokens in the +/// buffer controlled by this lexer, otherwise return the next unexpanded +/// token. +std::optional<Token> Lexer::peekNextPPToken() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); if (isDependencyDirectivesLexer()) { if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) - return 2; - return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( - tok::l_paren); + return std::nullopt; + Token Result; + (void)convertDependencyDirectiveToken( + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result); + return Result; } // Switch to 'skipping' mode. This will ensure that we can lex a token @@ -3227,6 +3228,7 @@ unsigned Lexer::isNextPPTokenLParen() { bool atStartOfLine = IsAtStartOfLine; bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; bool leadingSpace = HasLeadingSpace; + bool isFirstPPToken = IsFirstPPToken; Token Tok; Lex(Tok); @@ -3237,13 +3239,13 @@ unsigned Lexer::isNextPPTokenLParen() { HasLeadingSpace = leadingSpace; IsAtStartOfLine = atStartOfLine; IsAtPhysicalStartOfLine = atPhysicalStartOfLine; - + IsFirstPPToken = isFirstPPToken; // Restore the lexer back to non-skipping mode. LexingRawMode = false; if (Tok.is(tok::eof)) - return 2; - return Tok.is(tok::l_paren); + return std::nullopt; + return Tok; } /// Find the end of a version control conflict marker. @@ -3739,10 +3741,6 @@ bool Lexer::Lex(Token &Result) { bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); - - if (returnedToken && Result.isFirstPPToken() && PP && - !PP->hasSeenMainFileFirstPPToken()) - PP->HandleMainFileFirstPPToken(Result); return returnedToken; } @@ -4546,8 +4544,6 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); BufferPtr = TokPtr + DDTok.Length; - if (PP && !PP->hasSeenMainFileFirstPPToken() && Result.isFirstPPToken()) - PP->HandleMainFileFirstPPToken(Result); return TokPtr; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 70934b9b1dec3..be061f462f65a 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -183,9 +183,9 @@ static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) { AttributeCommonInfo::AttrArgsInfo AttrArgsInfo = AttributeCommonInfo::getCXX11AttrArgsInfo(II); if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required) - return PP.isNextPPTokenLParen(); + return PP.isNextPPTokenOneOf<tok::l_paren>(); - return !PP.isNextPPTokenLParen() || + return !PP.isNextPPTokenOneOf<tok::l_paren>() || AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional; } return false; @@ -1242,9 +1242,6 @@ void Preprocessor::HandleDirective(Token &Result) { // pp-directive. bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); - if (!hasSeenMainFileFirstPPToken()) - HandleMainFileFirstPPToken(Result); - // Save the '#' token in case we need to return it later. Token SavedHash = Result; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 97bdeb873d699..b8b91e32179af 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -418,44 +418,6 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, return !llvm::is_contained(MI->params(), II); } -/// isNextPPTokenLParen - Determine whether the next preprocessor token to be -/// lexed is a '('. If so, consume the token and return true, if not, this -/// method should have no observable side-effect on the lexed tokens. -bool Preprocessor::isNextPPTokenLParen() { - // Do some quick tests for rejection cases. - unsigned Val; - if (CurLexer) - Val = CurLexer->isNextPPTokenLParen(); - else - Val = CurTokenLexer->isNextTokenLParen(); - - if (Val == 2) { - // We have run off the end. If it's a source file we don't - // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the - // macro stack. - if (CurPPLexer) - return false; - for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { - if (Entry.TheLexer) - Val = Entry.TheLexer->isNextPPTokenLParen(); - else - Val = Entry.TheTokenLexer->isNextTokenLParen(); - - if (Val != 2) - break; - - // Ran off the end of a source file? - if (Entry.ThePPLexer) - return false; - } - } - - // Okay, if we know that the token is a '(', lex it and return. Otherwise we - // have found something that isn't a '(' or we found the end of the - // translation unit. In either case, return false. - return Val == 1; -} - /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be /// expanded as a macro, handle it and return the next token as 'Identifier'. bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, @@ -469,9 +431,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // to disable the optimization in this case. if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro(); - if (!hasSeenMainFileFirstPPToken()) - HandleMainFileFirstPPToken(Identifier); - // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { if (Callbacks) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 18b2f5f02d6c7..9329f9fd4460a 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -566,6 +566,21 @@ void Preprocessor::EnterMainSourceFile() { // #imported, it won't be re-entered. if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID)) markIncluded(*FE); + + // Record the first PP token in the main file. This is used to generate + // better diagnostics for C++ modules. + // + // // This is a comment. + // #define FOO int // note: add 'module;' to the start of the file + // ^ FirstPPToken // to introduce a global module fragment. + // + // export module M; // error: module declaration must occur + // // at the start of the translation unit. + if (getLangOpts().CPlusPlusModules) { + std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken(); + if (FirstPPTok && FirstPPTok->isFirstPPToken()) + FirstPPTokenLoc = FirstPPTok->getLocation(); + } } // Preprocess Predefines to populate the initial preprocessor state. @@ -813,14 +828,14 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { if (!Identifier.isExpandDisabled() && MI->isEnabled()) { // C99 6.10.3p10: If the preprocessing token immediately after the // macro name isn't a '(', this macro should not be expanded. - if (!MI->isFunctionLike() || isNextPPTokenLParen()) + if (!MI->isFunctionLike() || isNextPPTokenOneOf<tok::l_paren>()) return HandleMacroExpandedIdentifier(Identifier, MD); } else { // C99 6.10.3.4p2 says that a disabled macro may never again be // expanded, even if it's in a context where it could be expanded in the // future. Identifier.setFlag(Token::DisableExpand); - if (MI->isObjectLike() || isNextPPTokenLParen()) + if (MI->isObjectLike() || isNextPPTokenOneOf<tok::l_paren>()) Diag(Identifier, diag::pp_disabled_macro_expansion); } } diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 6e93416e01c0c..fbb8c4262d6da 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -921,13 +921,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, } /// isNextTokenLParen - If the next token lexed will pop this macro off the -/// expansion stack, return 2. If the next unexpanded token is a '(', return -/// 1, otherwise return 0. -unsigned TokenLexer::isNextTokenLParen() const { +/// expansion stack, return std::nullopt, otherwise return the next unexpanded +/// token. +std::optional<Token> TokenLexer::peekNextPPToken() const { // Out of tokens? if (isAtEnd()) - return 2; - return Tokens[CurTokenIdx].is(tok::l_paren); + return std::nullopt; + return Tokens[CurTokenIdx]; } /// isParsingPreprocessorDirective - Return true if we are in the middle of a diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index fe70ce3fba6a5..7c982bcd63d73 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -337,11 +337,9 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // tokens in a file (excluding the global module fragment.). if (getLangOpts().CPlusPlusModules && !IntroducerIsFirstPPToken && !SeenGMF) { Diag(ModuleLoc, diag::err_module_decl_not_at_start); - SourceLocation BeginLoc = PP.getMainFileFirstPPToken().getLocation(); - if (BeginLoc.isValid()) { - Diag(BeginLoc, diag::note_global_module_introducer_missing) - << FixItHint::CreateInsertion(BeginLoc, "module;\n"); - } + SourceLocation BeginLoc = PP.getMainFileFirstPPTokenLoc(); + Diag(BeginLoc, diag::note_global_module_introducer_missing) + << FixItHint::CreateInsertion(BeginLoc, "module;\n"); } // C++23 [module.unit]p1: ... The identifiers module and import shall not diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 33c8abbec35a3..2adb55484be88 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -49,8 +49,7 @@ class LexerTest : public ::testing::Test { } std::unique_ptr<Preprocessor> CreatePP(StringRef Source, - TrivialModuleLoader &ModLoader, - StringRef PreDefines = {}) { + TrivialModuleLoader &ModLoader) { std::unique_ptr<llvm::MemoryBuffer> Buf = llvm::MemoryBuffer::getMemBuffer(Source); SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); @@ -63,7 +62,7 @@ class LexerTest : public ::testing::Test { /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); if (!PreDefines.empty()) - PP->setPredefines(PreDefines.str()); + PP->setPredefines(PreDefines); PP->Initialize(*Target); PP->EnterMainSourceFile(); return PP; @@ -111,6 +110,7 @@ class LexerTest : public ::testing::Test { std::shared_ptr<TargetOptions> TargetOpts; IntrusiveRefCntPtr<TargetInfo> Target; std::unique_ptr<Preprocessor> PP; + std::string PreDefines; }; TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { @@ -773,6 +773,7 @@ TEST(LexerPreambleTest, PreambleBounds) { } TEST_F(LexerTest, CheckFirstPPToken) { + LangOpts.CPlusPlusModules = true; { TrivialModuleLoader ModLoader; auto PP = CreatePP("// This is a comment\n" @@ -781,9 +782,8 @@ TEST_F(LexerTest, CheckFirstPPToken) { Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::kw_int)); + EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc().isValid()); + EXPECT_EQ(PP->getMainFileFirstPPTokenLoc(), Tok.getLocation()); } { TrivialModuleLoader ModLoader; @@ -794,24 +794,28 @@ TEST_F(LexerTest, CheckFirstPPToken) { Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::hash)); + EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok, + PP->getSourceManager(), PP->getLangOpts(), + /*IgnoreWhiteSpace=*/false)); + EXPECT_TRUE(Tok.isFirstPPToken()); + EXPECT_TRUE(Tok.is(tok::hash)); } { + PreDefines = "#define FOO int\n"; TrivialModuleLoader ModLoader; auto PP = CreatePP("// This is a comment\n" "FOO a;", - ModLoader, "#define FOO int\n"); + ModLoader); Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::identifier)); - EXPECT_TRUE( - PP->getMainFileFirstPPToken().getIdentifierInfo()->isStr("FOO")); + EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok, + PP->getSourceManager(), PP->getLangOpts(), + /*IgnoreWhiteSpace=*/false)); + EXPECT_TRUE(Tok.isFirstPPToken()); + EXPECT_TRUE(Tok.is(tok::raw_identifier)); + EXPECT_TRUE(Tok.getRawIdentifier() == "FOO"); } } } // anonymous namespace `````````` </details> https://github.com/llvm/llvm-project/pull/145244 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits