https://github.com/yronglin updated https://github.com/llvm/llvm-project/pull/191004
>From 373880aed203efd8521dfb76a3f52fedee2592dc Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Thu, 9 Apr 2026 00:19:55 +0800 Subject: [PATCH 1/7] [C++][Modules][Preprocessor] Clang should not convert a import preprocessing token to contextual keyword if a digraph character following import Signed-off-by: yronglin <[email protected]> --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Lex/Lexer.h | 4 ++++ clang/lib/Lex/Lexer.cpp | 26 ++++++++++++++-------- clang/lib/Lex/Preprocessor.cpp | 33 +++++++++++++++++++++++----- clang/test/CXX/module/cpp.pre/p1.cpp | 26 ++++++++++++++++++++++ 5 files changed, 75 insertions(+), 15 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2da7175b51ea3..9c0155265874b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -406,6 +406,7 @@ Bug Fixes in This Version - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643) - Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694) - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121) +- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 0459a863bc08d..8e4cc7a95b327 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -732,6 +732,10 @@ class Lexer : public PreprocessorLexer { /// otherwise return P. static const char *SkipEscapedNewLines(const char *P); + /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and + /// returns the advanced pointer. + static const char *SkipHorizontalWhitespace(const char *Ptr); + /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a /// diagnostic. static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr, diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 10246552bb13d..29caeb943e3df 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1336,6 +1336,18 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } } +const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) { + // Small amounts of horizontal whitespace is very common between tokens. + // Check for space character separately to skip the expensive + // isHorizontalWhitespace() check + if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) { + do { + ++Ptr; + } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)); + } + return Ptr; +} + std::optional<Token> Lexer::findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, @@ -3764,16 +3776,12 @@ bool Lexer::LexTokenInternal(Token &Result) { assert(!Result.hasPtrData() && "Result has not been reset"); // CurPtr - Cache BufferPtr in an automatic variable. - const char *CurPtr = BufferPtr; - - // Small amounts of horizontal whitespace is very common between tokens. - // Check for space character separately to skip the expensive - // isHorizontalWhitespace() check - if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) { - do { - ++CurPtr; - } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)); + const char *CurPtr = SkipHorizontalWhitespace(BufferPtr); + /// CurPtr has been advanced forward, indicating that a horizontal whitespace + /// character has been encountered. Check if the Lexer is in keep whitespace + /// mode. + if (CurPtr != BufferPtr) { // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the // whitespace. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index c430da67c1469..4130e64be855e 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1380,13 +1380,34 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective( CurPPLexer->ParsingPreprocessorDirective, true); - // The next token may be an angled string literal after import keyword. - llvm::SaveAndRestore<bool> SavedParsingFilemame( - CurPPLexer->ParsingFilename, - Result.getIdentifierInfo()->isImportKeyword()); + bool ParsingFilename = false; + if (Result.getIdentifierInfo()->isImportKeyword()) { + if (getLangOpts().Digraphs && CurLexer && + CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) { + // If the import preprocessing token folled by a digraph character '<:', + // the import preprocessing should not traited as a import contextual + // keyword. Eg. + // int + // import <:10 + // :>; + // + // This is a array definition, and equivalent to: + // + // int import[10]; + const char *CurPtr = CurLexer->getBufferLocation(); + CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr); + auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts()); + auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts()); + if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%')) + return false; + } + ParsingFilename = true; + } - std::optional<Token> NextTok = - CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken(); + // The next token may be an angled string literal after import keyword. + llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename, + ParsingFilename); + std::optional<Token> NextTok = peekNextPPToken(); if (!NextTok) return false; diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp index 989915004ff57..0e2fb65390e99 100644 --- a/clang/test/CXX/module/cpp.pre/p1.cpp +++ b/clang/test/CXX/module/cpp.pre/p1.cpp @@ -38,6 +38,8 @@ // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify //--- hash.cpp @@ -205,3 +207,27 @@ export module m; // expected-error {{module directive lines are not allowed on l // expected-error {{module declaration must occur at the start of the translation unit}} \ // expected-note@#1 {{add 'module;'}} #endif + +//--- digraph.cpp +// expected-no-diagnostics +int +import <:10 +:>; + +void foo() { + for (int i = 0; i < 10; ++i) + import[i] = i; +} + +//--- digraph2.cpp +// expected-no-diagnostics +using import = int; + +void bar(int); + +void foo(int val = +import <%%> +) { + bar(val); +} + >From 0c0d98300b68c2237876aef08150b0f160b08470 Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Thu, 9 Apr 2026 21:54:18 +0800 Subject: [PATCH 2/7] Revert "[C++][Modules][Preprocessor] Clang should not convert a import preprocessing token to contextual keyword if a digraph character following import" This reverts commit 373880aed203efd8521dfb76a3f52fedee2592dc. --- clang/docs/ReleaseNotes.rst | 1 - clang/include/clang/Lex/Lexer.h | 4 ---- clang/lib/Lex/Lexer.cpp | 26 ++++++++-------------- clang/lib/Lex/Preprocessor.cpp | 33 +++++----------------------- clang/test/CXX/module/cpp.pre/p1.cpp | 26 ---------------------- 5 files changed, 15 insertions(+), 75 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9c0155265874b..2da7175b51ea3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -406,7 +406,6 @@ Bug Fixes in This Version - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643) - Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694) - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121) -- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 8e4cc7a95b327..0459a863bc08d 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -732,10 +732,6 @@ class Lexer : public PreprocessorLexer { /// otherwise return P. static const char *SkipEscapedNewLines(const char *P); - /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and - /// returns the advanced pointer. - static const char *SkipHorizontalWhitespace(const char *Ptr); - /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a /// diagnostic. static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr, diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 29caeb943e3df..10246552bb13d 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1336,18 +1336,6 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } } -const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) { - // Small amounts of horizontal whitespace is very common between tokens. - // Check for space character separately to skip the expensive - // isHorizontalWhitespace() check - if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) { - do { - ++Ptr; - } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)); - } - return Ptr; -} - std::optional<Token> Lexer::findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, @@ -3776,12 +3764,16 @@ bool Lexer::LexTokenInternal(Token &Result) { assert(!Result.hasPtrData() && "Result has not been reset"); // CurPtr - Cache BufferPtr in an automatic variable. - const char *CurPtr = SkipHorizontalWhitespace(BufferPtr); + const char *CurPtr = BufferPtr; + + // Small amounts of horizontal whitespace is very common between tokens. + // Check for space character separately to skip the expensive + // isHorizontalWhitespace() check + if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) { + do { + ++CurPtr; + } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)); - /// CurPtr has been advanced forward, indicating that a horizontal whitespace - /// character has been encountered. Check if the Lexer is in keep whitespace - /// mode. - if (CurPtr != BufferPtr) { // If we are keeping whitespace and other tokens, just return what we just // skipped. The next lexer invocation will return the token after the // whitespace. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 4130e64be855e..c430da67c1469 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1380,34 +1380,13 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective( CurPPLexer->ParsingPreprocessorDirective, true); - bool ParsingFilename = false; - if (Result.getIdentifierInfo()->isImportKeyword()) { - if (getLangOpts().Digraphs && CurLexer && - CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) { - // If the import preprocessing token folled by a digraph character '<:', - // the import preprocessing should not traited as a import contextual - // keyword. Eg. - // int - // import <:10 - // :>; - // - // This is a array definition, and equivalent to: - // - // int import[10]; - const char *CurPtr = CurLexer->getBufferLocation(); - CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr); - auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts()); - auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts()); - if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%')) - return false; - } - ParsingFilename = true; - } - // The next token may be an angled string literal after import keyword. - llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename, - ParsingFilename); - std::optional<Token> NextTok = peekNextPPToken(); + llvm::SaveAndRestore<bool> SavedParsingFilemame( + CurPPLexer->ParsingFilename, + Result.getIdentifierInfo()->isImportKeyword()); + + std::optional<Token> NextTok = + CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken(); if (!NextTok) return false; diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp index 0e2fb65390e99..989915004ff57 100644 --- a/clang/test/CXX/module/cpp.pre/p1.cpp +++ b/clang/test/CXX/module/cpp.pre/p1.cpp @@ -38,8 +38,6 @@ // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify -// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify -// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify //--- hash.cpp @@ -207,27 +205,3 @@ export module m; // expected-error {{module directive lines are not allowed on l // expected-error {{module declaration must occur at the start of the translation unit}} \ // expected-note@#1 {{add 'module;'}} #endif - -//--- digraph.cpp -// expected-no-diagnostics -int -import <:10 -:>; - -void foo() { - for (int i = 0; i < 10; ++i) - import[i] = i; -} - -//--- digraph2.cpp -// expected-no-diagnostics -using import = int; - -void bar(int); - -void foo(int val = -import <%%> -) { - bar(val); -} - >From f348770552e584bcc240c8f7136eabfbc1184f93 Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Thu, 9 Apr 2026 23:53:22 +0800 Subject: [PATCH 3/7] [C++][Modules] Don't check '<' after 'import' when converting import pp-token to contextual keyword Signed-off-by: yronglin <[email protected]> --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Lex/Preprocessor.cpp | 2 +- clang/lib/Parse/Parser.cpp | 10 ++++++++ clang/test/CXX/module/cpp.pre/p1.cpp | 36 +++++++++++++++++++++++++++- 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 2da7175b51ea3..9c0155265874b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -406,6 +406,7 @@ Bug Fixes in This Version - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643) - Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694) - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121) +- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index c430da67c1469..9b21777965ed8 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1394,7 +1394,7 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { LookUpIdentifierInfo(*NextTok); if (Result.getIdentifierInfo()->isImportKeyword()) { - if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon, + if (NextTok->isOneOf(tok::identifier, tok::colon, tok::header_name)) { Result.setKind(tok::kw_import); ModuleImportLoc = Result.getLocation(); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index c4f745612e06c..3e57330e9e09a 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -2495,6 +2495,16 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, break; } + // FIXME: If the previous token is tok::header_name like the following: + // + // import <%%> + // + // The diagnostic location is incorrect. + // + // <source file>:1:10: error: import directive must end with a ';' + // 1 | import <%%> + // | ^ + // | ; bool LexedSemi = false; if (getLangOpts().CPlusPlusModules) LexedSemi = diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp index 989915004ff57..d0cf0ee8efe1a 100644 --- a/clang/test/CXX/module/cpp.pre/p1.cpp +++ b/clang/test/CXX/module/cpp.pre/p1.cpp @@ -38,7 +38,10 @@ // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify - +// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/digraph3.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/digraph4.cpp -fsyntax-only -verify //--- hash.cpp // expected-no-diagnostics @@ -205,3 +208,34 @@ export module m; // expected-error {{module directive lines are not allowed on l // expected-error {{module declaration must occur at the start of the translation unit}} \ // expected-note@#1 {{add 'module;'}} #endif + +//--- digraph.cpp +// expected-no-diagnostics +int +import <:10 +:>; + +void foo() { + for (int i = 0; i < 10; ++i) + import[i] = i; +} + +//--- digraph2.cpp +// expected-no-diagnostics +using import = int; + +void bar(int); + +void foo(int val = +import <% +%> +) { + bar(val); +} + +//--- digraph3.cpp +import <%%>; // expected-error {{'%%' file not found}} + +//--- digraph4.cpp +import <::>; // expected-error {{'::' file not found}} + >From ac0b845d14d053c031f1d162d28ba085154b5fc8 Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Sun, 12 Apr 2026 02:45:48 +0800 Subject: [PATCH 4/7] [Clang][Preprocessor] Unify header-name lookahead for import and include Introduce Preprocessor::isNextPPTokenHeaderNameOrOneOf to centralize lookahead logic for header-name formation and token classification under ParsingFilename mode. Refactor handling of C++20 module/import contextual keywords and LexHeaderName to use the new helper, ensuring consistent behavior between `import` and `#include`. This fixes incorrect acceptance of cases where macro expansion after a digraph-like `<:` leads to invalid header-name parsing, e.g.: #define FOO foo> #include <:FOO Now such cases are rejected as expected. Also adjusts peekNextPPToken to properly support dependency directive lexers. No functional change intended for valid code; improves correctness and consistency in edge cases involving header-name lexing. Signed-off-by: yronglin <[email protected]> --- clang/docs/ReleaseNotes.rst | 2 +- clang/include/clang/Lex/Preprocessor.h | 23 +++++++++++++- clang/lib/Lex/Lexer.cpp | 18 +++++------ clang/lib/Lex/Preprocessor.cpp | 44 +++++++++++--------------- clang/test/CXX/cpp/cpp.include/p3.cpp | 5 +++ clang/test/CXX/module/cpp.pre/p1.cpp | 33 +++++++++++++------ 6 files changed, 79 insertions(+), 46 deletions(-) create mode 100644 clang/test/CXX/cpp/cpp.include/p3.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9c0155265874b..dc246af32a70c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -406,7 +406,7 @@ Bug Fixes in This Version - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643) - Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694) - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121) -- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693) +- Fixed incorrect handling of header-name lookahead in C++ import and #include directives involving digraphs and macro expansion. (#GH190693) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index c7e152a75f51f..bb34f00360041 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -48,6 +48,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Registry.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/TrailingObjects.h" #include <cassert> #include <cstddef> @@ -1842,6 +1843,26 @@ class Preprocessor { void HandleCXXImportDirective(Token Import); void HandleCXXModuleDirective(Token Module); + template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) { + // First, tries to form a valid header-name token. + llvm::SaveAndRestore<bool> SavedFilename(CurPPLexer->ParsingFilename, + true); + if (auto Tok = peekNextPPToken()) { + if (Tok->is(tok::header_name)) + return true; + } + + // If that fails and it's not one of the other tokens, then it's not a + // directive. + CurPPLexer->ParsingFilename = false; + if (auto NextTok = peekNextPPToken()) { + if (NextTok->is(tok::raw_identifier)) + LookUpIdentifierInfo(*NextTok); + return NextTok->isOneOf(Ks...); + } + return false; + } + /// Callback invoked when the lexer sees one of export, import or module token /// at the start of a line. /// @@ -2393,12 +2414,12 @@ class Preprocessor { return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false; } -private: /// peekNextPPToken - Return std::nullopt if there are no more tokens in the /// buffer controlled by this lexer, otherwise return the next unexpanded /// token. std::optional<Token> peekNextPPToken() const; +private: /// Identifiers used for SEH handling in Borland. These are only /// allowed in particular circumstances // __except block diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 10246552bb13d..2982788c7de23 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3219,15 +3219,6 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { std::optional<Token> Lexer::peekNextPPToken() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); - if (isDependencyDirectivesLexer()) { - if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) - return std::nullopt; - Token Result; - (void)convertDependencyDirectiveToken( - DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result); - return Result; - } - // Switch to 'skipping' mode. This will ensure that we can lex a token // without emitting diagnostics, disables macro expansion, and will cause EOF // to return an EOF token instead of popping the include stack. @@ -3242,7 +3233,14 @@ std::optional<Token> Lexer::peekNextPPToken() { MultipleIncludeOpt MIOptState = MIOpt; Token Tok; - Lex(Tok); + if (isDependencyDirectivesLexer()) { + if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) + return std::nullopt; + (void)convertDependencyDirectiveToken( + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Tok); + } else { + Lex(Tok); + } // Restore state that may have changed. BufferPtr = TmpBufferPtr; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 9b21777965ed8..ad7f9683dacaf 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1098,8 +1098,11 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { // __has_include(__has_include)) if (CurPPLexer->ParsingFilename) LexUnexpandedToken(FilenameTok); - else + else if ((getLangOpts().CPlusPlusModules && isImportingCXXNamedModules()) || + isNextPPTokenHeaderNameOrOneOf(tok::less)) CurPPLexer->LexIncludeFilename(FilenameTok); + else + Lex(FilenameTok); } else { Lex(FilenameTok); } @@ -1380,33 +1383,24 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) { llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective( CurPPLexer->ParsingPreprocessorDirective, true); - // The next token may be an angled string literal after import keyword. - llvm::SaveAndRestore<bool> SavedParsingFilemame( - CurPPLexer->ParsingFilename, - Result.getIdentifierInfo()->isImportKeyword()); - - std::optional<Token> NextTok = - CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken(); - if (!NextTok) - return false; - - if (NextTok->is(tok::raw_identifier)) - LookUpIdentifierInfo(*NextTok); - - if (Result.getIdentifierInfo()->isImportKeyword()) { - if (NextTok->isOneOf(tok::identifier, tok::colon, - tok::header_name)) { - Result.setKind(tok::kw_import); - ModuleImportLoc = Result.getLocation(); - IsAtImport = false; - return true; + if (II->isModuleKeyword()) { + if (auto NextTok = peekNextPPToken()) { + if (NextTok->is(tok::raw_identifier)) + LookUpIdentifierInfo(*NextTok); + if (NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) { + Result.setKind(tok::kw_module); + ModuleDeclLoc = Result.getLocation(); + return true; + } } + return false; } - if (Result.getIdentifierInfo()->isModuleKeyword() && - NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) { - Result.setKind(tok::kw_module); - ModuleDeclLoc = Result.getLocation(); + if (II->isImportKeyword() && + isNextPPTokenHeaderNameOrOneOf(tok::identifier, tok::colon, tok::less)) { + Result.setKind(tok::kw_import); + ModuleImportLoc = Result.getLocation(); + IsAtImport = false; return true; } diff --git a/clang/test/CXX/cpp/cpp.include/p3.cpp b/clang/test/CXX/cpp/cpp.include/p3.cpp new file mode 100644 index 0000000000000..7afb4af1c9423 --- /dev/null +++ b/clang/test/CXX/cpp/cpp.include/p3.cpp @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 %s -fsyntax-only -verify + +#define FOO foo> +#include <:FOO +// expected-error@-1 {{expected "FILENAME" or <FILENAME>}} diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp index d0cf0ee8efe1a..5b6f225f2f58c 100644 --- a/clang/test/CXX/module/cpp.pre/p1.cpp +++ b/clang/test/CXX/module/cpp.pre/p1.cpp @@ -38,11 +38,12 @@ // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify -// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify -// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify -// RUN: %clang_cc1 -std=c++20 %t/digraph3.cpp -fsyntax-only -verify -// RUN: %clang_cc1 -std=c++20 %t/digraph4.cpp -fsyntax-only -verify - +// RUN: %clang_cc1 -std=c++20 %t/header_name1.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/header_name2.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/header_name3.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/header_name4.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/header_name5.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/header_name6.cpp -fsyntax-only -verify //--- hash.cpp // expected-no-diagnostics # // preprocessing directive @@ -209,7 +210,7 @@ export module m; // expected-error {{module directive lines are not allowed on l // expected-note@#1 {{add 'module;'}} #endif -//--- digraph.cpp +//--- header_name1.cpp // expected-no-diagnostics int import <:10 @@ -220,7 +221,7 @@ void foo() { import[i] = i; } -//--- digraph2.cpp +//--- header_name2.cpp // expected-no-diagnostics using import = int; @@ -233,9 +234,23 @@ import <% bar(val); } -//--- digraph3.cpp +//--- header_name3.cpp +export module M; import <%%>; // expected-error {{'%%' file not found}} -//--- digraph4.cpp +//--- header_name4.cpp +export module M; import <::>; // expected-error {{'::' file not found}} +//--- header_name5.cpp +export module M; +#define FOO foo>; +import <:FOO +// expected-error@-1 {{use of undeclared identifier 'foo'}} +// expected-error@-2 {{a type specifier is required for all declarations}} +// expected-error@-3 {{expected expression}} + +//--- header_name6.cpp +export module M; +#define HEADER vector> +import <HEADER; // expected-error {{file not found}} >From 54449521a68dc686210d9dac49f7f87c8155cc57 Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Sun, 12 Apr 2026 02:56:10 +0800 Subject: [PATCH 5/7] Format Signed-off-by: yronglin <[email protected]> --- clang/include/clang/Lex/Preprocessor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index bb34f00360041..34bf1c2f0fec7 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1845,8 +1845,8 @@ class Preprocessor { template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) { // First, tries to form a valid header-name token. - llvm::SaveAndRestore<bool> SavedFilename(CurPPLexer->ParsingFilename, - true); + llvm::SaveAndRestore<bool> SavedParsingFilename(CurPPLexer->ParsingFilename, + true); if (auto Tok = peekNextPPToken()) { if (Tok->is(tok::header_name)) return true; >From 3500dda26d523f9a28215d447a99353864c4bd3e Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Mon, 13 Apr 2026 01:09:45 +0800 Subject: [PATCH 6/7] Avoid 2nd peekNextPPToken call Signed-off-by: yronglin <[email protected]> --- clang/include/clang/Lex/Preprocessor.h | 75 ++++++++++++++++++-------- 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 34bf1c2f0fec7..ec29209fc1836 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1843,26 +1843,6 @@ class Preprocessor { void HandleCXXImportDirective(Token Import); void HandleCXXModuleDirective(Token Module); - template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) { - // First, tries to form a valid header-name token. - llvm::SaveAndRestore<bool> SavedParsingFilename(CurPPLexer->ParsingFilename, - true); - if (auto Tok = peekNextPPToken()) { - if (Tok->is(tok::header_name)) - return true; - } - - // If that fails and it's not one of the other tokens, then it's not a - // directive. - CurPPLexer->ParsingFilename = false; - if (auto NextTok = peekNextPPToken()) { - if (NextTok->is(tok::raw_identifier)) - LookUpIdentifierInfo(*NextTok); - return NextTok->isOneOf(Ks...); - } - return false; - } - /// Callback invoked when the lexer sees one of export, import or module token /// at the start of a line. /// @@ -2414,12 +2394,65 @@ class Preprocessor { return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false; } +private: /// peekNextPPToken - Return std::nullopt if there are no more tokens in the /// buffer controlled by this lexer, otherwise return the next unexpanded /// token. std::optional<Token> peekNextPPToken() const; -private: + /// Check whether the next preprocessing token can form a header-name token + /// or matches one of the specified token kinds. + /// + /// This performs a lookahead without consuming tokens: + /// - First, it temporarily enables `ParsingFilename` to attempt forming a + /// `tok::header_name` (e.g. `<foo>` or "foo"). + /// - If that succeeds, returns true. + /// - Otherwise, it restores normal lexing mode and checks whether the next + /// token matches any of the provided kinds `Ks...`. + /// + /// This helper is used to classify tokens in contexts such as C++20 `import` + /// and `#include`, ensuring consistent handling of header-name lexing and + /// avoiding unintended lexer state changes. + template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) { + // First, tries to form a valid header-name token. + llvm::SaveAndRestore<bool> SavedParsingFilename(CurPPLexer->ParsingFilename, + true); + if (auto NextTok = peekNextPPToken()) { + if (NextTok->is(tok::header_name)) + return true; + + // In ParsingFilename mode, both <...> and "..." are lexed as header-name + // tokens. If a valid header-name is formed, return immediately. + // + // Otherwise, we may need to re-lex the token in normal mode. This is + // required for '<' to correctly handle cases such as digraphs ('<:', + // '<%') and situations where macro expansion affects token boundaries, + // e.g.: + // + // #define VECTOR vector> + // #include <VECTOR + // + // In such cases, the initial lex in ParsingFilename mode may fail to form + // a header-name, and only normal lexing yields the correct tokenization. + // For all other tokens, the result is identical between the two modes, so + // we can classify them directly and avoid calling the relatively + // expensive second peekNextPPToken() on the common path. + if (NextTok->isNot(tok::less)) { + if (NextTok->is(tok::raw_identifier)) + LookUpIdentifierInfo(*NextTok); + return NextTok->isOneOf(Ks...); + } + } + + // If that fails and it's not one of the other tokens, then it's not a + // directive. + CurPPLexer->ParsingFilename = false; + if (auto NextTok = peekNextPPToken()) { + return NextTok->isOneOf(Ks...); + } + return false; + } + /// Identifiers used for SEH handling in Borland. These are only /// allowed in particular circumstances // __except block >From 536afcf6703887461fe4160e6e671969e0747bbe Mon Sep 17 00:00:00 2001 From: yronglin <[email protected]> Date: Mon, 13 Apr 2026 01:20:04 +0800 Subject: [PATCH 7/7] Add comments in LexHeaderName Signed-off-by: yronglin <[email protected]> --- clang/lib/Lex/Preprocessor.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index ad7f9683dacaf..b74dd59bdb021 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1098,7 +1098,11 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { // __has_include(__has_include)) if (CurPPLexer->ParsingFilename) LexUnexpandedToken(FilenameTok); - else if ((getLangOpts().CPlusPlusModules && isImportingCXXNamedModules()) || + else if ((getLangOpts().CPlusPlusModules && + isImportingCXXNamedModules()) || // C++ import already checked in + // HandleModuleContextualKeyword, + // avoid duplicate check in + // LexHeaderName. isNextPPTokenHeaderNameOrOneOf(tok::less)) CurPPLexer->LexIncludeFilename(FilenameTok); else _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
