https://github.com/yronglin updated 
https://github.com/llvm/llvm-project/pull/191004

>From 373880aed203efd8521dfb76a3f52fedee2592dc Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Thu, 9 Apr 2026 00:19:55 +0800
Subject: [PATCH 1/4] [C++][Modules][Preprocessor] Clang should not convert a
 import preprocessing token to contextual keyword if a digraph character
 following import

Signed-off-by: yronglin <[email protected]>
---
 clang/docs/ReleaseNotes.rst          |  1 +
 clang/include/clang/Lex/Lexer.h      |  4 ++++
 clang/lib/Lex/Lexer.cpp              | 26 ++++++++++++++--------
 clang/lib/Lex/Preprocessor.cpp       | 33 +++++++++++++++++++++++-----
 clang/test/CXX/module/cpp.pre/p1.cpp | 26 ++++++++++++++++++++++
 5 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2da7175b51ea3..9c0155265874b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,7 @@ Bug Fixes in This Version
 - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect 
array filler lowering. (#GH189643)
 - Fixed the behavior in C23 of ``auto``, by emitting an error when an array 
type is specified for a ``char *``. (#GH162694)
 - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers 
in C23. (#GH164121)
+- Fixed incorrect handling of C++ import preprocessing token when a digraph 
character after import. (#GH190693)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 0459a863bc08d..8e4cc7a95b327 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -732,6 +732,10 @@ class Lexer : public PreprocessorLexer {
   /// otherwise return P.
   static const char *SkipEscapedNewLines(const char *P);
 
+  /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and
+  /// returns the advanced pointer.
+  static const char *SkipHorizontalWhitespace(const char *Ptr);
+
   /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
   /// diagnostic.
   static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 10246552bb13d..29caeb943e3df 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1336,6 +1336,18 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
   }
 }
 
+const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) {
+  // Small amounts of horizontal whitespace is very common between tokens.
+  // Check for space character separately to skip the expensive
+  // isHorizontalWhitespace() check
+  if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) {
+    do {
+      ++Ptr;
+    } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr));
+  }
+  return Ptr;
+}
+
 std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
                                           const SourceManager &SM,
                                           const LangOptions &LangOpts,
@@ -3764,16 +3776,12 @@ bool Lexer::LexTokenInternal(Token &Result) {
   assert(!Result.hasPtrData() && "Result has not been reset");
 
   // CurPtr - Cache BufferPtr in an automatic variable.
-  const char *CurPtr = BufferPtr;
-
-  // Small amounts of horizontal whitespace is very common between tokens.
-  // Check for space character separately to skip the expensive
-  // isHorizontalWhitespace() check
-  if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) {
-    do {
-      ++CurPtr;
-    } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr));
+  const char *CurPtr = SkipHorizontalWhitespace(BufferPtr);
 
+  /// CurPtr has been advanced forward, indicating that a horizontal whitespace
+  /// character has been encountered. Check if the Lexer is in keep whitespace
+  /// mode.
+  if (CurPtr != BufferPtr) {
     // If we are keeping whitespace and other tokens, just return what we just
     // skipped.  The next lexer invocation will return the token after the
     // whitespace.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c430da67c1469..4130e64be855e 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1380,13 +1380,34 @@ bool Preprocessor::HandleModuleContextualKeyword(Token 
&Result) {
   llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
       CurPPLexer->ParsingPreprocessorDirective, true);
 
-  // The next token may be an angled string literal after import keyword.
-  llvm::SaveAndRestore<bool> SavedParsingFilemame(
-      CurPPLexer->ParsingFilename,
-      Result.getIdentifierInfo()->isImportKeyword());
+  bool ParsingFilename = false;
+  if (Result.getIdentifierInfo()->isImportKeyword()) {
+    if (getLangOpts().Digraphs && CurLexer &&
+        CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) 
{
+      // If the import preprocessing token folled by a digraph character '<:',
+      // the import preprocessing should not traited as a import contextual
+      // keyword. Eg.
+      //    int
+      //    import <:10
+      //    :>;
+      //
+      // This is a array definition, and equivalent to:
+      //
+      //    int import[10];
+      const char *CurPtr = CurLexer->getBufferLocation();
+      CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr);
+      auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts());
+      auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts());
+      if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%'))
+        return false;
+    }
+    ParsingFilename = true;
+  }
 
-  std::optional<Token> NextTok =
-      CurLexer ? CurLexer->peekNextPPToken() : 
CurTokenLexer->peekNextPPToken();
+  // The next token may be an angled string literal after import keyword.
+  llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename,
+                                                  ParsingFilename);
+  std::optional<Token> NextTok = peekNextPPToken();
   if (!NextTok)
     return false;
 
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp 
b/clang/test/CXX/module/cpp.pre/p1.cpp
index 989915004ff57..0e2fb65390e99 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,6 +38,8 @@
 // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only 
-verify
 // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' 
-fsyntax-only -verify
 // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
 
 
 //--- hash.cpp
@@ -205,3 +207,27 @@ export module m; // expected-error {{module directive 
lines are not allowed on l
                  // expected-error {{module declaration must occur at the 
start of the translation unit}} \
                  // expected-note@#1 {{add 'module;'}}
 #endif
+
+//--- digraph.cpp
+// expected-no-diagnostics
+int
+import <:10
+:>;
+
+void foo() {
+    for (int i = 0; i < 10; ++i)
+        import[i] = i;
+}
+
+//--- digraph2.cpp
+// expected-no-diagnostics
+using import = int;
+
+void bar(int);
+
+void foo(int val =
+import <%%>
+) {
+   bar(val);
+}
+

>From 0c0d98300b68c2237876aef08150b0f160b08470 Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Thu, 9 Apr 2026 21:54:18 +0800
Subject: [PATCH 2/4] Revert "[C++][Modules][Preprocessor] Clang should not
 convert a import preprocessing token to contextual keyword if a digraph
 character following import"

This reverts commit 373880aed203efd8521dfb76a3f52fedee2592dc.
---
 clang/docs/ReleaseNotes.rst          |  1 -
 clang/include/clang/Lex/Lexer.h      |  4 ----
 clang/lib/Lex/Lexer.cpp              | 26 ++++++++--------------
 clang/lib/Lex/Preprocessor.cpp       | 33 +++++-----------------------
 clang/test/CXX/module/cpp.pre/p1.cpp | 26 ----------------------
 5 files changed, 15 insertions(+), 75 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9c0155265874b..2da7175b51ea3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,7 +406,6 @@ Bug Fixes in This Version
 - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect 
array filler lowering. (#GH189643)
 - Fixed the behavior in C23 of ``auto``, by emitting an error when an array 
type is specified for a ``char *``. (#GH162694)
 - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers 
in C23. (#GH164121)
-- Fixed incorrect handling of C++ import preprocessing token when a digraph 
character after import. (#GH190693)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 8e4cc7a95b327..0459a863bc08d 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -732,10 +732,6 @@ class Lexer : public PreprocessorLexer {
   /// otherwise return P.
   static const char *SkipEscapedNewLines(const char *P);
 
-  /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and
-  /// returns the advanced pointer.
-  static const char *SkipHorizontalWhitespace(const char *Ptr);
-
   /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
   /// diagnostic.
   static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 29caeb943e3df..10246552bb13d 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1336,18 +1336,6 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
   }
 }
 
-const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) {
-  // Small amounts of horizontal whitespace is very common between tokens.
-  // Check for space character separately to skip the expensive
-  // isHorizontalWhitespace() check
-  if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) {
-    do {
-      ++Ptr;
-    } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr));
-  }
-  return Ptr;
-}
-
 std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
                                           const SourceManager &SM,
                                           const LangOptions &LangOpts,
@@ -3776,12 +3764,16 @@ bool Lexer::LexTokenInternal(Token &Result) {
   assert(!Result.hasPtrData() && "Result has not been reset");
 
   // CurPtr - Cache BufferPtr in an automatic variable.
-  const char *CurPtr = SkipHorizontalWhitespace(BufferPtr);
+  const char *CurPtr = BufferPtr;
+
+  // Small amounts of horizontal whitespace is very common between tokens.
+  // Check for space character separately to skip the expensive
+  // isHorizontalWhitespace() check
+  if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) {
+    do {
+      ++CurPtr;
+    } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr));
 
-  /// CurPtr has been advanced forward, indicating that a horizontal whitespace
-  /// character has been encountered. Check if the Lexer is in keep whitespace
-  /// mode.
-  if (CurPtr != BufferPtr) {
     // If we are keeping whitespace and other tokens, just return what we just
     // skipped.  The next lexer invocation will return the token after the
     // whitespace.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 4130e64be855e..c430da67c1469 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1380,34 +1380,13 @@ bool Preprocessor::HandleModuleContextualKeyword(Token 
&Result) {
   llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
       CurPPLexer->ParsingPreprocessorDirective, true);
 
-  bool ParsingFilename = false;
-  if (Result.getIdentifierInfo()->isImportKeyword()) {
-    if (getLangOpts().Digraphs && CurLexer &&
-        CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) 
{
-      // If the import preprocessing token folled by a digraph character '<:',
-      // the import preprocessing should not traited as a import contextual
-      // keyword. Eg.
-      //    int
-      //    import <:10
-      //    :>;
-      //
-      // This is a array definition, and equivalent to:
-      //
-      //    int import[10];
-      const char *CurPtr = CurLexer->getBufferLocation();
-      CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr);
-      auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts());
-      auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts());
-      if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%'))
-        return false;
-    }
-    ParsingFilename = true;
-  }
-
   // The next token may be an angled string literal after import keyword.
-  llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename,
-                                                  ParsingFilename);
-  std::optional<Token> NextTok = peekNextPPToken();
+  llvm::SaveAndRestore<bool> SavedParsingFilemame(
+      CurPPLexer->ParsingFilename,
+      Result.getIdentifierInfo()->isImportKeyword());
+
+  std::optional<Token> NextTok =
+      CurLexer ? CurLexer->peekNextPPToken() : 
CurTokenLexer->peekNextPPToken();
   if (!NextTok)
     return false;
 
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp 
b/clang/test/CXX/module/cpp.pre/p1.cpp
index 0e2fb65390e99..989915004ff57 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,8 +38,6 @@
 // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only 
-verify
 // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' 
-fsyntax-only -verify
 // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
 
 
 //--- hash.cpp
@@ -207,27 +205,3 @@ export module m; // expected-error {{module directive 
lines are not allowed on l
                  // expected-error {{module declaration must occur at the 
start of the translation unit}} \
                  // expected-note@#1 {{add 'module;'}}
 #endif
-
-//--- digraph.cpp
-// expected-no-diagnostics
-int
-import <:10
-:>;
-
-void foo() {
-    for (int i = 0; i < 10; ++i)
-        import[i] = i;
-}
-
-//--- digraph2.cpp
-// expected-no-diagnostics
-using import = int;
-
-void bar(int);
-
-void foo(int val =
-import <%%>
-) {
-   bar(val);
-}
-

>From f348770552e584bcc240c8f7136eabfbc1184f93 Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Thu, 9 Apr 2026 23:53:22 +0800
Subject: [PATCH 3/4] [C++][Modules] Don't check '<' after 'import' when
 converting import pp-token to contextual keyword

Signed-off-by: yronglin <[email protected]>
---
 clang/docs/ReleaseNotes.rst          |  1 +
 clang/lib/Lex/Preprocessor.cpp       |  2 +-
 clang/lib/Parse/Parser.cpp           | 10 ++++++++
 clang/test/CXX/module/cpp.pre/p1.cpp | 36 +++++++++++++++++++++++++++-
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2da7175b51ea3..9c0155265874b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,7 @@ Bug Fixes in This Version
 - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect 
array filler lowering. (#GH189643)
 - Fixed the behavior in C23 of ``auto``, by emitting an error when an array 
type is specified for a ``char *``. (#GH162694)
 - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers 
in C23. (#GH164121)
+- Fixed incorrect handling of C++ import preprocessing token when a digraph 
character after import. (#GH190693)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c430da67c1469..9b21777965ed8 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1394,7 +1394,7 @@ bool Preprocessor::HandleModuleContextualKeyword(Token 
&Result) {
     LookUpIdentifierInfo(*NextTok);
 
   if (Result.getIdentifierInfo()->isImportKeyword()) {
-    if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon,
+    if (NextTok->isOneOf(tok::identifier, tok::colon,
                          tok::header_name)) {
       Result.setKind(tok::kw_import);
       ModuleImportLoc = Result.getLocation();
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index c4f745612e06c..3e57330e9e09a 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -2495,6 +2495,16 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
     break;
   }
 
+  // FIXME: If the previous token is tok::header_name like the following:
+  //
+  //  import <%%>
+  //
+  // The diagnostic location is incorrect.
+  //
+  //  <source file>:1:10: error: import directive must end with a ';'
+  //   1 | import <%%>
+  //     |          ^
+  //     |          ;
   bool LexedSemi = false;
   if (getLangOpts().CPlusPlusModules)
     LexedSemi =
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp 
b/clang/test/CXX/module/cpp.pre/p1.cpp
index 989915004ff57..d0cf0ee8efe1a 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,7 +38,10 @@
 // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only 
-verify
 // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' 
-fsyntax-only -verify
 // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
-
+// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph3.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph4.cpp -fsyntax-only -verify
 
 //--- hash.cpp
 // expected-no-diagnostics
@@ -205,3 +208,34 @@ export module m; // expected-error {{module directive 
lines are not allowed on l
                  // expected-error {{module declaration must occur at the 
start of the translation unit}} \
                  // expected-note@#1 {{add 'module;'}}
 #endif
+
+//--- digraph.cpp
+// expected-no-diagnostics
+int
+import <:10
+:>;
+
+void foo() {
+    for (int i = 0; i < 10; ++i)
+        import[i] = i;
+}
+
+//--- digraph2.cpp
+// expected-no-diagnostics
+using import = int;
+
+void bar(int);
+
+void foo(int val =
+import <%
+%>
+) {
+   bar(val);
+}
+
+//--- digraph3.cpp
+import <%%>; // expected-error {{'%%' file not found}}
+
+//--- digraph4.cpp
+import <::>; // expected-error {{'::' file not found}}
+

>From ac0b845d14d053c031f1d162d28ba085154b5fc8 Mon Sep 17 00:00:00 2001
From: yronglin <[email protected]>
Date: Sun, 12 Apr 2026 02:45:48 +0800
Subject: [PATCH 4/4] [Clang][Preprocessor] Unify header-name lookahead for
 import and include

Introduce Preprocessor::isNextPPTokenHeaderNameOrOneOf to centralize
lookahead logic for header-name formation and token classification
under ParsingFilename mode.

Refactor handling of C++20 module/import contextual keywords and
LexHeaderName to use the new helper, ensuring consistent behavior
between `import` and `#include`.

This fixes incorrect acceptance of cases where macro expansion after
a digraph-like `<:` leads to invalid header-name parsing, e.g.:

  #define FOO foo>
  #include <:FOO

Now such cases are rejected as expected.

Also adjusts peekNextPPToken to properly support dependency directive
lexers.

No functional change intended for valid code; improves correctness and
consistency in edge cases involving header-name lexing.

Signed-off-by: yronglin <[email protected]>
---
 clang/docs/ReleaseNotes.rst            |  2 +-
 clang/include/clang/Lex/Preprocessor.h | 23 +++++++++++++-
 clang/lib/Lex/Lexer.cpp                | 18 +++++------
 clang/lib/Lex/Preprocessor.cpp         | 44 +++++++++++---------------
 clang/test/CXX/cpp/cpp.include/p3.cpp  |  5 +++
 clang/test/CXX/module/cpp.pre/p1.cpp   | 33 +++++++++++++------
 6 files changed, 79 insertions(+), 46 deletions(-)
 create mode 100644 clang/test/CXX/cpp/cpp.include/p3.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9c0155265874b..dc246af32a70c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,7 +406,7 @@ Bug Fixes in This Version
 - Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect 
array filler lowering. (#GH189643)
 - Fixed the behavior in C23 of ``auto``, by emitting an error when an array 
type is specified for a ``char *``. (#GH162694)
 - Fixed incorrect rejection of ``auto`` with reordered declaration specifiers 
in C23. (#GH164121)
-- Fixed incorrect handling of C++ import preprocessing token when a digraph 
character after import. (#GH190693)
+- Fixed incorrect handling of header-name lookahead in C++ import and #include 
directives involving digraphs and macro expansion. (#GH190693)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index c7e152a75f51f..bb34f00360041 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -48,6 +48,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Registry.h"
+#include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/TrailingObjects.h"
 #include <cassert>
 #include <cstddef>
@@ -1842,6 +1843,26 @@ class Preprocessor {
   void HandleCXXImportDirective(Token Import);
   void HandleCXXModuleDirective(Token Module);
 
+  template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) {
+    // First, tries to form a valid header-name token.
+    llvm::SaveAndRestore<bool> SavedFilename(CurPPLexer->ParsingFilename,
+                                              true);
+    if (auto Tok = peekNextPPToken()) {
+      if (Tok->is(tok::header_name))
+        return true;
+    }
+
+    //  If that fails and it's not one of the other tokens, then it's not a
+    //  directive.
+    CurPPLexer->ParsingFilename = false;
+    if (auto NextTok = peekNextPPToken()) {
+      if (NextTok->is(tok::raw_identifier))
+        LookUpIdentifierInfo(*NextTok);
+      return NextTok->isOneOf(Ks...);
+    }
+    return false;
+  }
+
   /// Callback invoked when the lexer sees one of export, import or module 
token
   /// at the start of a line.
   ///
@@ -2393,12 +2414,12 @@ class Preprocessor {
     return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false;
   }
 
-private:
   /// peekNextPPToken - Return std::nullopt if there are no more tokens in the
   /// buffer controlled by this lexer, otherwise return the next unexpanded
   /// token.
   std::optional<Token> peekNextPPToken() const;
 
+private:
   /// Identifiers used for SEH handling in Borland. These are only
   /// allowed in particular circumstances
   // __except block
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 10246552bb13d..2982788c7de23 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3219,15 +3219,6 @@ bool Lexer::LexEndOfFile(Token &Result, const char 
*CurPtr) {
 std::optional<Token> Lexer::peekNextPPToken() {
   assert(!LexingRawMode && "How can we expand a macro from a skipping 
buffer?");
 
-  if (isDependencyDirectivesLexer()) {
-    if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
-      return std::nullopt;
-    Token Result;
-    (void)convertDependencyDirectiveToken(
-        DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
-    return Result;
-  }
-
   // Switch to 'skipping' mode.  This will ensure that we can lex a token
   // without emitting diagnostics, disables macro expansion, and will cause EOF
   // to return an EOF token instead of popping the include stack.
@@ -3242,7 +3233,14 @@ std::optional<Token> Lexer::peekNextPPToken() {
   MultipleIncludeOpt MIOptState = MIOpt;
 
   Token Tok;
-  Lex(Tok);
+  if (isDependencyDirectivesLexer()) {
+    if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
+      return std::nullopt;
+    (void)convertDependencyDirectiveToken(
+        DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Tok);
+  } else {
+    Lex(Tok);
+  }
 
   // Restore state that may have changed.
   BufferPtr = TmpBufferPtr;
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 9b21777965ed8..ad7f9683dacaf 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1098,8 +1098,11 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, 
bool AllowMacroExpansion) {
     // __has_include(__has_include))
     if (CurPPLexer->ParsingFilename)
       LexUnexpandedToken(FilenameTok);
-    else
+    else if ((getLangOpts().CPlusPlusModules && isImportingCXXNamedModules()) 
||
+             isNextPPTokenHeaderNameOrOneOf(tok::less))
       CurPPLexer->LexIncludeFilename(FilenameTok);
+    else
+      Lex(FilenameTok);
   } else {
     Lex(FilenameTok);
   }
@@ -1380,33 +1383,24 @@ bool Preprocessor::HandleModuleContextualKeyword(Token 
&Result) {
   llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
       CurPPLexer->ParsingPreprocessorDirective, true);
 
-  // The next token may be an angled string literal after import keyword.
-  llvm::SaveAndRestore<bool> SavedParsingFilemame(
-      CurPPLexer->ParsingFilename,
-      Result.getIdentifierInfo()->isImportKeyword());
-
-  std::optional<Token> NextTok =
-      CurLexer ? CurLexer->peekNextPPToken() : 
CurTokenLexer->peekNextPPToken();
-  if (!NextTok)
-    return false;
-
-  if (NextTok->is(tok::raw_identifier))
-    LookUpIdentifierInfo(*NextTok);
-
-  if (Result.getIdentifierInfo()->isImportKeyword()) {
-    if (NextTok->isOneOf(tok::identifier, tok::colon,
-                         tok::header_name)) {
-      Result.setKind(tok::kw_import);
-      ModuleImportLoc = Result.getLocation();
-      IsAtImport = false;
-      return true;
+  if (II->isModuleKeyword()) {
+    if (auto NextTok = peekNextPPToken()) {
+      if (NextTok->is(tok::raw_identifier))
+        LookUpIdentifierInfo(*NextTok);
+      if (NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
+        Result.setKind(tok::kw_module);
+        ModuleDeclLoc = Result.getLocation();
+        return true;
+      }
     }
+    return false;
   }
 
-  if (Result.getIdentifierInfo()->isModuleKeyword() &&
-      NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
-    Result.setKind(tok::kw_module);
-    ModuleDeclLoc = Result.getLocation();
+  if (II->isImportKeyword() &&
+      isNextPPTokenHeaderNameOrOneOf(tok::identifier, tok::colon, tok::less)) {
+    Result.setKind(tok::kw_import);
+    ModuleImportLoc = Result.getLocation();
+    IsAtImport = false;
     return true;
   }
 
diff --git a/clang/test/CXX/cpp/cpp.include/p3.cpp 
b/clang/test/CXX/cpp/cpp.include/p3.cpp
new file mode 100644
index 0000000000000..7afb4af1c9423
--- /dev/null
+++ b/clang/test/CXX/cpp/cpp.include/p3.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 %s -fsyntax-only -verify
+
+#define FOO foo>
+#include <:FOO
+// expected-error@-1 {{expected "FILENAME" or <FILENAME>}}
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp 
b/clang/test/CXX/module/cpp.pre/p1.cpp
index d0cf0ee8efe1a..5b6f225f2f58c 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,11 +38,12 @@
 // RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only 
-verify
 // RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' 
-fsyntax-only -verify
 // RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph3.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph4.cpp -fsyntax-only -verify
-
+// RUN: %clang_cc1 -std=c++20 %t/header_name1.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name2.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name3.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name4.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name5.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name6.cpp -fsyntax-only -verify
 //--- hash.cpp
 // expected-no-diagnostics
 #                       // preprocessing directive
@@ -209,7 +210,7 @@ export module m; // expected-error {{module directive lines 
are not allowed on l
                  // expected-note@#1 {{add 'module;'}}
 #endif
 
-//--- digraph.cpp
+//--- header_name1.cpp
 // expected-no-diagnostics
 int
 import <:10
@@ -220,7 +221,7 @@ void foo() {
         import[i] = i;
 }
 
-//--- digraph2.cpp
+//--- header_name2.cpp
 // expected-no-diagnostics
 using import = int;
 
@@ -233,9 +234,23 @@ import <%
    bar(val);
 }
 
-//--- digraph3.cpp
+//--- header_name3.cpp
+export module M;
 import <%%>; // expected-error {{'%%' file not found}}
 
-//--- digraph4.cpp
+//--- header_name4.cpp
+export module M;
 import <::>; // expected-error {{'::' file not found}}
 
+//--- header_name5.cpp
+export module M;
+#define FOO foo>;
+import <:FOO
+// expected-error@-1 {{use of undeclared identifier 'foo'}}
+// expected-error@-2 {{a type specifier is required for all declarations}}
+// expected-error@-3 {{expected expression}}
+
+//--- header_name6.cpp
+export module M;
+#define HEADER vector>
+import <HEADER; // expected-error {{file not found}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to