https://github.com/yronglin updated https://github.com/llvm/llvm-project/pull/145244
>From 373daed324ca99f0b327c424c64a101f7d0f99a3 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Thu, 12 Jun 2025 21:46:13 +0800 Subject: [PATCH 1/5] [Clang] Add peekNextPPToken, makes peek next token without side-effects Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Lexer.h | 10 ++++---- clang/include/clang/Lex/Preprocessor.h | 8 ++++++- clang/include/clang/Lex/TokenLexer.h | 7 +++--- clang/lib/Lex/Lexer.cpp | 21 +++++++++-------- clang/lib/Lex/PPMacroExpansion.cpp | 32 ++++++++++++-------------- clang/lib/Lex/TokenLexer.cpp | 10 ++++---- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index bb65ae010cffa..a595cda1eaa77 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer { //===--------------------------------------------------------------------===// // Context that changes as the file is lexed. // NOTE: any state that mutates when in raw mode must have save/restore code - // in Lexer::isNextPPTokenLParen. + // in Lexer::peekNextPPToken. // BufferPtr - Current pointer into the buffer. This is the next character // to be lexed. @@ -642,10 +642,10 @@ class Lexer : public PreprocessorLexer { BufferPtr = TokEnd; } - /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a - /// tok::l_paren token, 0 if it is something else and 2 if there are no more - /// tokens in the buffer controlled by this lexer. - unsigned isNextPPTokenLParen(); + /// peekNextPPToken - Return std::nullopt if there are no more tokens in the + /// buffer controlled by this lexer, otherwise return the next unexpanded + /// token. + std::optional<Token> peekNextPPToken(); //===--------------------------------------------------------------------===// // Lexer character reading interfaces. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 78be2bd64d61c..4fe7a79393afc 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2288,7 +2288,9 @@ class Preprocessor { /// Determine whether the next preprocessor token to be /// lexed is a '('. If so, consume the token and return true, if not, this /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen(); + bool isNextPPTokenLParen() { + return peekNextPPToken().value_or(Token{}).is(tok::l_paren); + } private: /// Identifiers used for SEH handling in Borland. These are only @@ -2667,6 +2669,10 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); + /// Peek the next token. If so, return the token, if not, this + /// method should have no observable side-effect on the lexed tokens. + std::optional<Token> peekNextPPToken(); + /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 4d229ae610674..777b4e6266c71 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,10 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the - /// expansion stack, return 2. If the next unexpanded token is a '(', return - /// 1, otherwise return 0. - unsigned isNextTokenLParen() const; + /// If the next token lexed will pop this macro off the expansion stack, + /// return std::nullopt, otherwise return the next unexpanded token. + std::optional<Token> peekNextPPToken() const; /// Lex and return a token from this macro stream. bool Lex(Token &Tok); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 93200458f04b4..8e977eac8c983 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3200,18 +3200,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { return PP->HandleEndOfFile(Result, isPragmaLexer()); } -/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from -/// the specified lexer will return a tok::l_paren token, 0 if it is something -/// else and 2 if there are no more tokens in the buffer controlled by the -/// lexer. -unsigned Lexer::isNextPPTokenLParen() { +/// peekNextPPToken - Return std::nullopt if there are no more tokens in the +/// buffer controlled by this lexer, otherwise return the next unexpanded +/// token. +std::optional<Token> Lexer::peekNextPPToken() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); if (isDependencyDirectivesLexer()) { if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) - return 2; - return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( - tok::l_paren); + return std::nullopt; + Token Result; + (void)convertDependencyDirectiveToken( + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result); + return Result; } // Switch to 'skipping' mode. This will ensure that we can lex a token @@ -3240,8 +3241,8 @@ unsigned Lexer::isNextPPTokenLParen() { LexingRawMode = false; if (Tok.is(tok::eof)) - return 2; - return Tok.is(tok::l_paren); + return std::nullopt; + return Tok; } /// Find the end of a version control conflict marker. diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 37ac1bf07e9c0..585990f60c98a 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -418,42 +418,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, return !llvm::is_contained(MI->params(), II); } -/// isNextPPTokenLParen - Determine whether the next preprocessor token to be -/// lexed is a '('. If so, consume the token and return true, if not, this -/// method should have no observable side-effect on the lexed tokens. -bool Preprocessor::isNextPPTokenLParen() { +/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not, +/// this method should have no observable side-effect on the lexed tokens. +std::optional<Token> Preprocessor::peekNextPPToken() { // Do some quick tests for rejection cases. - unsigned Val; + std::optional<Token> Val; if (CurLexer) - Val = CurLexer->isNextPPTokenLParen(); + Val = CurLexer->peekNextPPToken(); else - Val = CurTokenLexer->isNextTokenLParen(); + Val = CurTokenLexer->peekNextPPToken(); - if (Val == 2) { + if (!Val) { // We have run off the end. If it's a source file we don't // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the // macro stack. if (CurPPLexer) - return false; + return std::nullopt; for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { if (Entry.TheLexer) - Val = Entry.TheLexer->isNextPPTokenLParen(); + Val = Entry.TheLexer->peekNextPPToken(); else - Val = Entry.TheTokenLexer->isNextTokenLParen(); + Val = Entry.TheTokenLexer->peekNextPPToken(); - if (Val != 2) + if (Val) break; // Ran off the end of a source file? if (Entry.ThePPLexer) - return false; + return std::nullopt; } } - // Okay, if we know that the token is a '(', lex it and return. Otherwise we - // have found something that isn't a '(' or we found the end of the - // translation unit. In either case, return false. - return Val == 1; + // Okay, we found the token and return. Otherwise we found the end of the + // translation unit. + return Val; } /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 6e93416e01c0c..fbb8c4262d6da 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -921,13 +921,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, } /// isNextTokenLParen - If the next token lexed will pop this macro off the -/// expansion stack, return 2. If the next unexpanded token is a '(', return -/// 1, otherwise return 0. -unsigned TokenLexer::isNextTokenLParen() const { +/// expansion stack, return std::nullopt, otherwise return the next unexpanded +/// token. +std::optional<Token> TokenLexer::peekNextPPToken() const { // Out of tokens? if (isAtEnd()) - return 2; - return Tokens[CurTokenIdx].is(tok::l_paren); + return std::nullopt; + return Tokens[CurTokenIdx]; } /// isParsingPreprocessorDirective - Return true if we are in the middle of a >From c963c3a9cdb7835e5c031bf7a1f82e61f0c15e42 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Sat, 14 Jun 2025 14:29:16 +0800 Subject: [PATCH 2/5] Update peekNextToken to isNextPPTokenOneOf Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Preprocessor.h | 41 +++++++++++++++++++++----- clang/lib/Lex/PPDirectives.cpp | 4 +-- clang/lib/Lex/PPMacroExpansion.cpp | 36 ---------------------- clang/lib/Lex/Preprocessor.cpp | 4 +-- 4 files changed, 37 insertions(+), 48 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 4fe7a79393afc..f5e85b3da1ba1 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2285,11 +2285,40 @@ class Preprocessor { } } - /// Determine whether the next preprocessor token to be - /// lexed is a '('. If so, consume the token and return true, if not, this + /// Check whether the next pp-token is one of the specificed token kind. this /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen() { - return peekNextPPToken().value_or(Token{}).is(tok::l_paren); + template <tok::TokenKind K, tok::TokenKind... Ks> bool isNextPPTokenOneOf() { + // Do some quick tests for rejection cases. + std::optional<Token> Val; + if (CurLexer) + Val = CurLexer->peekNextPPToken(); + else + Val = CurTokenLexer->peekNextPPToken(); + + if (!Val) { + // We have run off the end. If it's a source file we don't + // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the + // macro stack. + if (CurPPLexer) + return false; + for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { + if (Entry.TheLexer) + Val = Entry.TheLexer->peekNextPPToken(); + else + Val = Entry.TheTokenLexer->peekNextPPToken(); + + if (Val) + break; + + // Ran off the end of a source file? + if (Entry.ThePPLexer) + return false; + } + } + + // Okay, we found the token and return. Otherwise we found the end of the + // translation unit. + return Val->is(K) || (... || Val->is(Ks)); } private: @@ -2669,10 +2698,6 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); - /// Peek the next token. If so, return the token, if not, this - /// method should have no observable side-effect on the lexed tokens. - std::optional<Token> peekNextPPToken(); - /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 04a30f66fb736..be061f462f65a 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -183,9 +183,9 @@ static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) { AttributeCommonInfo::AttrArgsInfo AttrArgsInfo = AttributeCommonInfo::getCXX11AttrArgsInfo(II); if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required) - return PP.isNextPPTokenLParen(); + return PP.isNextPPTokenOneOf<tok::l_paren>(); - return !PP.isNextPPTokenLParen() || + return !PP.isNextPPTokenOneOf<tok::l_paren>() || AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional; } return false; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 585990f60c98a..b8b91e32179af 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -418,42 +418,6 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, return !llvm::is_contained(MI->params(), II); } -/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not, -/// this method should have no observable side-effect on the lexed tokens. -std::optional<Token> Preprocessor::peekNextPPToken() { - // Do some quick tests for rejection cases. - std::optional<Token> Val; - if (CurLexer) - Val = CurLexer->peekNextPPToken(); - else - Val = CurTokenLexer->peekNextPPToken(); - - if (!Val) { - // We have run off the end. If it's a source file we don't - // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the - // macro stack. - if (CurPPLexer) - return std::nullopt; - for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { - if (Entry.TheLexer) - Val = Entry.TheLexer->peekNextPPToken(); - else - Val = Entry.TheTokenLexer->peekNextPPToken(); - - if (Val) - break; - - // Ran off the end of a source file? - if (Entry.ThePPLexer) - return std::nullopt; - } - } - - // Okay, we found the token and return. Otherwise we found the end of the - // translation unit. - return Val; -} - /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be /// expanded as a macro, handle it and return the next token as 'Identifier'. bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 21fc7a2b6fae2..f28113e569218 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -811,14 +811,14 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { if (!Identifier.isExpandDisabled() && MI->isEnabled()) { // C99 6.10.3p10: If the preprocessing token immediately after the // macro name isn't a '(', this macro should not be expanded. - if (!MI->isFunctionLike() || isNextPPTokenLParen()) + if (!MI->isFunctionLike() || isNextPPTokenOneOf<tok::l_paren>()) return HandleMacroExpandedIdentifier(Identifier, MD); } else { // C99 6.10.3.4p2 says that a disabled macro may never again be // expanded, even if it's in a context where it could be expanded in the // future. Identifier.setFlag(Token::DisableExpand); - if (MI->isObjectLike() || isNextPPTokenLParen()) + if (MI->isObjectLike() || isNextPPTokenOneOf<tok::l_paren>()) Diag(Identifier, diag::pp_disabled_macro_expansion); } } >From 6fe2cd4c723695c0ce6a4d2ced7a2b2628cdaf5d Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Mon, 23 Jun 2025 01:30:20 +0800 Subject: [PATCH 3/5] [clang][Preprocessor] Handle the first pp-token in EnterMainSourceFile Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Preprocessor.h | 19 +++++--------- clang/lib/Lex/Lexer.cpp | 9 ++----- clang/lib/Lex/PPDirectives.cpp | 3 --- clang/lib/Lex/PPMacroExpansion.cpp | 3 --- clang/lib/Lex/Preprocessor.cpp | 15 ++++++++++++ clang/lib/Sema/SemaModule.cpp | 8 +++--- clang/unittests/Lex/LexerTest.cpp | 34 ++++++++++++++------------ 7 files changed, 45 insertions(+), 46 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 0ec1cb4d0c5d8..a3cf18f21ca40 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -351,7 +351,7 @@ class Preprocessor { bool LastTokenWasAt = false; /// First pp-token in current translation unit. - std::optional<Token> FirstPPToken; + SourceLocation FirstPPTokenLoc; /// A position within a C++20 import-seq. class StdCXXImportSeq { @@ -1769,20 +1769,13 @@ class Preprocessor { std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current, bool ForHasEmbed); - /// Whether the preprocessor already seen the first pp-token in main file. - bool hasSeenMainFileFirstPPToken() const { return FirstPPToken.has_value(); } - - /// Record first pp-token and check if it has a Token::FirstPPToken flag. - void HandleMainFileFirstPPToken(const Token &Tok) { - if (!hasSeenMainFileFirstPPToken() && Tok.isFirstPPToken() && - SourceMgr.isWrittenInMainFile(Tok.getLocation())) - FirstPPToken = Tok; + /// Get the start location of the first pp-token in main file. + SourceLocation getMainFileFirstPPTokenLoc() const { + assert(FirstPPTokenLoc.isValid() && + "Did not see the first pp-token in the main file"); + return FirstPPTokenLoc; } - Token getMainFileFirstPPToken() const { - assert(FirstPPToken && "First main file pp-token doesn't exists"); - return *FirstPPToken; - } bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index f4d16ecce393c..42ea7edf3aaad 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3228,6 +3228,7 @@ std::optional<Token> Lexer::peekNextPPToken() { bool atStartOfLine = IsAtStartOfLine; bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine; bool leadingSpace = HasLeadingSpace; + bool isFirstPPToken = IsFirstPPToken; Token Tok; Lex(Tok); @@ -3238,7 +3239,7 @@ std::optional<Token> Lexer::peekNextPPToken() { HasLeadingSpace = leadingSpace; IsAtStartOfLine = atStartOfLine; IsAtPhysicalStartOfLine = atPhysicalStartOfLine; - + IsFirstPPToken = isFirstPPToken; // Restore the lexer back to non-skipping mode. LexingRawMode = false; @@ -3740,10 +3741,6 @@ bool Lexer::Lex(Token &Result) { bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine); // (After the LexTokenInternal call, the lexer might be destroyed.) assert((returnedToken || !isRawLex) && "Raw lex must succeed"); - - if (returnedToken && Result.isFirstPPToken() && PP && - !PP->hasSeenMainFileFirstPPToken()) - PP->HandleMainFileFirstPPToken(Result); return returnedToken; } @@ -4547,8 +4544,6 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); BufferPtr = TokPtr + DDTok.Length; - if (PP && !PP->hasSeenMainFileFirstPPToken() && Result.isFirstPPToken()) - PP->HandleMainFileFirstPPToken(Result); return TokPtr; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index c8974e5a3528c..be061f462f65a 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1242,9 +1242,6 @@ void Preprocessor::HandleDirective(Token &Result) { // pp-directive. bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); - if (!hasSeenMainFileFirstPPToken()) - HandleMainFileFirstPPToken(Result); - // Save the '#' token in case we need to return it later. Token SavedHash = Result; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 709cf3bb87c8e..b8b91e32179af 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -431,9 +431,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // to disable the optimization in this case. if (CurPPLexer) CurPPLexer->MIOpt.ExpandedMacro(); - if (!hasSeenMainFileFirstPPToken()) - HandleMainFileFirstPPToken(Identifier); - // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. if (MI->isBuiltinMacro()) { if (Callbacks) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 7fecbe9eee53c..9329f9fd4460a 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -566,6 +566,21 @@ void Preprocessor::EnterMainSourceFile() { // #imported, it won't be re-entered. if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(MainFileID)) markIncluded(*FE); + + // Record the first PP token in the main file. This is used to generate + // better diagnostics for C++ modules. + // + // // This is a comment. + // #define FOO int // note: add 'module;' to the start of the file + // ^ FirstPPToken // to introduce a global module fragment. + // + // export module M; // error: module declaration must occur + // // at the start of the translation unit. + if (getLangOpts().CPlusPlusModules) { + std::optional<Token> FirstPPTok = CurLexer->peekNextPPToken(); + if (FirstPPTok && FirstPPTok->isFirstPPToken()) + FirstPPTokenLoc = FirstPPTok->getLocation(); + } } // Preprocess Predefines to populate the initial preprocessor state. diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index fe70ce3fba6a5..7c982bcd63d73 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -337,11 +337,9 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // tokens in a file (excluding the global module fragment.). if (getLangOpts().CPlusPlusModules && !IntroducerIsFirstPPToken && !SeenGMF) { Diag(ModuleLoc, diag::err_module_decl_not_at_start); - SourceLocation BeginLoc = PP.getMainFileFirstPPToken().getLocation(); - if (BeginLoc.isValid()) { - Diag(BeginLoc, diag::note_global_module_introducer_missing) - << FixItHint::CreateInsertion(BeginLoc, "module;\n"); - } + SourceLocation BeginLoc = PP.getMainFileFirstPPTokenLoc(); + Diag(BeginLoc, diag::note_global_module_introducer_missing) + << FixItHint::CreateInsertion(BeginLoc, "module;\n"); } // C++23 [module.unit]p1: ... The identifiers module and import shall not diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp index 33c8abbec35a3..2adb55484be88 100644 --- a/clang/unittests/Lex/LexerTest.cpp +++ b/clang/unittests/Lex/LexerTest.cpp @@ -49,8 +49,7 @@ class LexerTest : public ::testing::Test { } std::unique_ptr<Preprocessor> CreatePP(StringRef Source, - TrivialModuleLoader &ModLoader, - StringRef PreDefines = {}) { + TrivialModuleLoader &ModLoader) { std::unique_ptr<llvm::MemoryBuffer> Buf = llvm::MemoryBuffer::getMemBuffer(Source); SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf))); @@ -63,7 +62,7 @@ class LexerTest : public ::testing::Test { /*IILookup =*/nullptr, /*OwnsHeaderSearch =*/false); if (!PreDefines.empty()) - PP->setPredefines(PreDefines.str()); + PP->setPredefines(PreDefines); PP->Initialize(*Target); PP->EnterMainSourceFile(); return PP; @@ -111,6 +110,7 @@ class LexerTest : public ::testing::Test { std::shared_ptr<TargetOptions> TargetOpts; IntrusiveRefCntPtr<TargetInfo> Target; std::unique_ptr<Preprocessor> PP; + std::string PreDefines; }; TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { @@ -773,6 +773,7 @@ TEST(LexerPreambleTest, PreambleBounds) { } TEST_F(LexerTest, CheckFirstPPToken) { + LangOpts.CPlusPlusModules = true; { TrivialModuleLoader ModLoader; auto PP = CreatePP("// This is a comment\n" @@ -781,9 +782,8 @@ TEST_F(LexerTest, CheckFirstPPToken) { Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::kw_int)); + EXPECT_TRUE(PP->getMainFileFirstPPTokenLoc().isValid()); + EXPECT_EQ(PP->getMainFileFirstPPTokenLoc(), Tok.getLocation()); } { TrivialModuleLoader ModLoader; @@ -794,24 +794,28 @@ TEST_F(LexerTest, CheckFirstPPToken) { Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::hash)); + EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok, + PP->getSourceManager(), PP->getLangOpts(), + /*IgnoreWhiteSpace=*/false)); + EXPECT_TRUE(Tok.isFirstPPToken()); + EXPECT_TRUE(Tok.is(tok::hash)); } { + PreDefines = "#define FOO int\n"; TrivialModuleLoader ModLoader; auto PP = CreatePP("// This is a comment\n" "FOO a;", - ModLoader, "#define FOO int\n"); + ModLoader); Token Tok; PP->Lex(Tok); EXPECT_TRUE(Tok.is(tok::kw_int)); - EXPECT_TRUE(PP->hasSeenMainFileFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().isFirstPPToken()); - EXPECT_TRUE(PP->getMainFileFirstPPToken().is(tok::identifier)); - EXPECT_TRUE( - PP->getMainFileFirstPPToken().getIdentifierInfo()->isStr("FOO")); + EXPECT_FALSE(Lexer::getRawToken(PP->getMainFileFirstPPTokenLoc(), Tok, + PP->getSourceManager(), PP->getLangOpts(), + /*IgnoreWhiteSpace=*/false)); + EXPECT_TRUE(Tok.isFirstPPToken()); + EXPECT_TRUE(Tok.is(tok::raw_identifier)); + EXPECT_TRUE(Tok.getRawIdentifier() == "FOO"); } } } // anonymous namespace >From b3b3219b3d2e38828ba5f79acc81eb91b7359cb1 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Tue, 24 Jun 2025 19:09:20 +0800 Subject: [PATCH 4/5] [NFC] Update comments in code Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Preprocessor.h | 2 +- clang/include/clang/Lex/TokenLexer.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index a3cf18f21ca40..55661e8d4256f 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -350,7 +350,7 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; - /// First pp-token in current translation unit. + /// First pp-token source location in current translation unit. SourceLocation FirstPPTokenLoc; /// A position within a C++20 import-seq. diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 777b4e6266c71..620d3d620d7a1 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,8 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the expansion stack, - /// return std::nullopt, otherwise return the next unexpanded token. + /// If TokenLexer::isAtEnd returns true (the next token lexed will pop this + /// macro off the expansion stack, return std::nullopt, otherwise return the + /// next unexpanded token. std::optional<Token> peekNextPPToken() const; /// Lex and return a token from this macro stream. >From b4ab704003d7368335ec780009fbd462aceef6f3 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Wed, 25 Jun 2025 00:31:40 +0800 Subject: [PATCH 5/5] Fix unclosed parentheses in peekNextToken's comment Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/TokenLexer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 620d3d620d7a1..7ac933d8f9d45 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,8 +139,8 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If TokenLexer::isAtEnd returns true (the next token lexed will pop this - /// macro off the expansion stack, return std::nullopt, otherwise return the + /// If TokenLexer::isAtEnd returns true(the next token lexed will pop this + /// macro off the expansion stack), return std::nullopt, otherwise return the /// next unexpanded token. std::optional<Token> peekNextPPToken() const; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits