https://github.com/jansvoboda11 created 
https://github.com/llvm/llvm-project/pull/135813

This PR introduces new single-module preprocessing mode. It is very similar to 
single-file-parse mode, but has the following differences:
* Single-file mode skips over all inclusion directives, while the single-module 
mode skips only over import directives and does resolve textual inclusion 
directives.
* Single-file mode enters all branches of a conditional directive with an 
undefined identifier, while the single-module enters none of them.

This will be used from the dependency scanner to quickly discover a subset of 
modular dependencies of a TU/module. The dependencies aren't being imported in 
this mode, but the file-inclusion preprocessor callback does get invoked.

>From 9f16dedd19b7dda4f751375ef57fd18fc0e86b33 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svob...@apple.com>
Date: Wed, 9 Apr 2025 13:11:38 -0700
Subject: [PATCH] [clang][lex] Introduce new single-module-parse mode

---
 clang/include/clang/Driver/Options.td         |  4 +
 clang/include/clang/Lex/PreprocessorOptions.h |  7 ++
 clang/lib/Lex/PPDirectives.cpp                | 28 +++++-
 clang/lib/Lex/PPExpressions.cpp               | 12 ++-
 clang/test/Modules/single-module-parse-mode.c | 92 +++++++++++++++++++
 5 files changed, 134 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/Modules/single-module-parse-mode.c

diff --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index c9d2bc5e81976..5afac8f752d04 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3247,6 +3247,10 @@ def fno_modules_prune_non_affecting_module_map_files :
     Group<f_Group>, Flags<[]>, Visibility<[CC1Option]>,
     
MarshallingInfoNegativeFlag<HeaderSearchOpts<"ModulesPruneNonAffectingModuleMaps">>,
     HelpText<"Do not prune non-affecting module map files when writing module 
files">;
+def fmodules_single_module_parse_mode :
+  Flag<["-"], "fmodules-single-module-parse-mode">,
+  Group<f_Group>, Flags<[]>, Visibility<[CC1Option]>,
+  MarshallingInfoFlag<PreprocessorOpts<"SingleModuleParseMode">>;
 
 def fincremental_extensions :
   Flag<["-"], "fincremental-extensions">,
diff --git a/clang/include/clang/Lex/PreprocessorOptions.h 
b/clang/include/clang/Lex/PreprocessorOptions.h
index c2e3d68333024..83d5a821d07fa 100644
--- a/clang/include/clang/Lex/PreprocessorOptions.h
+++ b/clang/include/clang/Lex/PreprocessorOptions.h
@@ -152,6 +152,13 @@ class PreprocessorOptions {
   /// that the client can get the maximum amount of information from the 
parser.
   bool SingleFileParseMode = false;
 
+  /// When enabled, preprocessor is in a mode for parsing a single module only.
+  ///
+  /// Disables imports of other modules and if there are any unresolved
+  /// identifiers in preprocessor directive conditions it causes all blocks to
+  /// be skipped so that the client can get a strict subset of the contents.
+  bool SingleModuleParseMode = false;
+
   /// When enabled, the preprocessor will construct editor placeholder tokens.
   bool LexEditorPlaceholders = true;
 
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 8411526019f3e..18c04279184a1 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2073,10 +2073,11 @@ void 
Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
     return;
 
   if (FilenameTok.isNot(tok::header_name)) {
-    if (FilenameTok.is(tok::identifier) && PPOpts.SingleFileParseMode) {
+    if (FilenameTok.is(tok::identifier) &&
+        (PPOpts.SingleFileParseMode || PPOpts.SingleModuleParseMode)) {
       // If we saw #include IDENTIFIER and lexing didn't turn in into a header
-      // name, it was undefined. In 'single-file-parse' mode, just skip the
-      // directive without emitting diagnostics - the identifier might be
+      // name, it was undefined. In 'single-{file,module}-parse' mode, just 
skip
+      // the directive without emitting diagnostics - the identifier might be
       // normally defined in previously-skipped include directive.
       DiscardUntilEndOfDirective();
       return;
@@ -2384,10 +2385,15 @@ Preprocessor::ImportAction 
Preprocessor::HandleHeaderIncludeOrImport(
       (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
       ModuleToImport && !ModuleToImport->isHeaderUnit();
 
+  if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule) &&
+      PPOpts.SingleModuleParseMode) {
+    Action = IncludeLimitReached;
+  }
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
+  else if (MaybeTranslateInclude &&
+           (UsableHeaderUnit || UsableClangHeaderModule)) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
     // FIXME: Remove this; loadModule does the same check (but produces
@@ -3439,6 +3445,13 @@ void Preprocessor::HandleIfdefDirective(Token &Result,
     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
                                      /*wasskip*/false, /*foundnonskip*/false,
                                      /*foundelse*/false);
+  } else if (PPOpts.SingleModuleParseMode && !MI) {
+    // In 'single-module-parse mode' undefined identifiers trigger skipping of
+    // all the directive blocks. We lie here and set FoundNonSkipPortion so 
that
+    // even any \#else blocks get skipped.
+    SkipExcludedConditionalBlock(
+        HashToken.getLocation(), DirectiveTok.getLocation(),
+        /*FoundNonSkipPortion=*/true, /*FoundElse=*/false);
   } else if (!MI == isIfndef || RetainExcludedCB) {
     // Yes, remember that we are inside a conditional, then lex the next token.
     CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
@@ -3493,6 +3506,13 @@ void Preprocessor::HandleIfDirective(Token &IfToken,
     // the directive blocks.
     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
                                      /*foundnonskip*/false, 
/*foundelse*/false);
+  } else if (PPOpts.SingleModuleParseMode && DER.IncludedUndefinedIds) {
+    // In 'single-module-parse mode' undefined identifiers trigger skipping of
+    // all the directive blocks. We lie here and set FoundNonSkipPortion so 
that
+    // even any \#else blocks get skipped.
+    SkipExcludedConditionalBlock(HashToken.getLocation(), 
IfToken.getLocation(),
+                                 /*FoundNonSkipPortion=*/true,
+                                 /*FoundElse=*/false);
   } else if (ConditionalTrue || RetainExcludedCB) {
     // Yes, remember that we are inside a conditional, then lex the next token.
     CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index a202af774256a..240522ba194d6 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -593,11 +593,13 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, 
unsigned MinPrec,
                                      Token &PeekTok, bool ValueLive,
                                      bool &IncludedUndefinedIds,
                                      Preprocessor &PP) {
-  if (PP.getPreprocessorOpts().SingleFileParseMode && IncludedUndefinedIds) {
-    // The single-file parse mode behavior kicks in as soon as single 
identifier
-    // is undefined. If we've already seen one, there's no point in continuing
-    // with the rest of the expression. Besides saving work, this also prevents
-    // calling undefined function-like macros.
+  if ((PP.getPreprocessorOpts().SingleFileParseMode ||
+       PP.getPreprocessorOpts().SingleModuleParseMode) &&
+      IncludedUndefinedIds) {
+    // The single-{file,module}-parse mode behavior kicks in as soon as single
+    // identifier is undefined. If we've already seen one, there's no point in
+    // continuing with the rest of the expression. Besides saving work, this
+    // also prevents calling undefined function-like macros.
     PP.DiscardUntilEndOfDirective(PeekTok);
     return true;
   }
diff --git a/clang/test/Modules/single-module-parse-mode.c 
b/clang/test/Modules/single-module-parse-mode.c
new file mode 100644
index 0000000000000..947c0e3c1fa8b
--- /dev/null
+++ b/clang/test/Modules/single-module-parse-mode.c
@@ -0,0 +1,92 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache \
+// RUN:   -emit-module %t/module.modulemap -fmodule-name=B -o %t/cache/B.pcm \
+// RUN:   -fmodules-single-module-parse-mode 2>&1 | FileCheck %s
+
+// Modules are not imported.
+// CHECK-NOT: A.h:1:2: error: unreachable
+
+// Headers belonging to this module are included.
+// CHECK:     B2.h:2:2: warning: success
+
+// Non-modular headers are included.
+// CHECK:     T.h:2:2: warning: success
+
+// No branches are entered for #if UNDEFINED.
+// CHECK-NOT: B1.h:6:2: error: unreachable
+// CHECK-NOT: B1.h:8:2: error: unreachable
+// CHECK-NOT: B1.h:10:2: error: unreachable
+
+// No branches are entered for #ifdef UNDEFINED.
+// CHECK-NOT: B1.h:14:2: error: unreachable
+// CHECK-NOT: B1.h:16:2: error: unreachable
+
+// No branches are entered for #ifndef UNDEFINED.
+// CHECK-NOT: B1.h:20:2: error: unreachable
+// CHECK-NOT: B1.h:22:2: error: unreachable
+
+// No error messages are emitted for UNDEFINED_FUNCTION_LIKE().
+// CHECK-NOT: B1.h:25:2: error: unreachable
+
+// The correct branch is entered for #if DEFINED.
+// CHECK:     B1.h:32:3: warning: success
+// CHECK-NOT: B1.h:34:3: error: unreachable
+// CHECK-NOT: B1.h:36:3: error: unreachable
+
+// Headers belonging to this module are included.
+// CHECK:     B2.h:2:2: warning: success
+
+//--- module.modulemap
+module A { header "A.h" }
+module B {
+  header "B1.h"
+  header "B2.h"
+}
+//--- A.h
+#error unreachable
+//--- B1.h
+#include "A.h"
+#include "B2.h"
+#include "T.h"
+
+#if UNDEFINED
+# error unreachable
+#elif UNDEFINED2
+# error unreachable
+#else
+# error unreachable
+#endif
+
+#ifdef UNDEFINED
+# error unreachable
+#else
+# error unreachable
+#endif
+
+#ifndef UNDEFINED
+# error unreachable
+#else
+# error unreachable
+#endif
+
+#if UNDEFINED_FUNCTION_LIKE()
+#endif
+
+#define DEFINED_1 1
+#define DEFINED_2 1
+
+#if DEFINED_1
+# warning success
+#elif DEFINED_2
+# error unreachable
+#else
+# error unreachable
+#endif
+//--- B2.h
+// Headers belonging to this module are included.
+#warning success
+//--- T.h
+// Non-modular headers are included.
+#warning success

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to