This revision was automatically updated to reflect the committed changes.
Closed by commit rGaf2d11b1d5c1: [C++20][Modules] Implement include 
translation. (authored by iains).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128981/new/

https://reviews.llvm.org/D128981

Files:
  clang/include/clang/Lex/Preprocessor.h
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Lex/Preprocessor.cpp
  clang/lib/Parse/Parser.cpp
  clang/test/Modules/cxx20-include-translation.cpp

Index: clang/test/Modules/cxx20-include-translation.cpp
===================================================================
--- /dev/null
+++ clang/test/Modules/cxx20-include-translation.cpp
@@ -0,0 +1,109 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h1.h -emit-header-unit -o h1.pcm
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h2.h -emit-header-unit -o h2.pcm
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h3.h -emit-header-unit -o h3.pcm
+// RUN: %clang_cc1 -std=c++20 -xc++-user-header h4.h -emit-header-unit -o h4.pcm
+
+// RUN: %clang_cc1 -std=c++20 Xlate.cpp -emit-module-interface -o Xlate.pcm \
+// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \
+// RUN: -fmodule-file=h4.pcm -fsyntax-only -Wauto-import -verify
+
+// Check that we do the intended translation and not more.
+// RUN: %clang_cc1 -std=c++20 Xlate.cpp \
+// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \
+// RUN: -fmodule-file=h4.pcm  -E -undef | FileCheck %s
+
+// We expect no diagnostics here, the used functions should all be available.
+// RUN: %clang_cc1 -std=c++20 Xlate.cpp -emit-module-interface \
+// RUN: -fmodule-file=h1.pcm -fmodule-file=h2.pcm -fmodule-file=h3.pcm \
+// RUN: -fmodule-file=h4.pcm -fsyntax-only
+
+// The content of the headers is not terribly important, we just want to check
+// whether they are textually included or include-translated.
+//--- h1.h
+#ifndef H1_GUARD
+#define H1_GUARD
+
+#define ONE 1
+
+void foo();
+
+#endif // H1_GUARD
+
+//--- h2.h
+#ifndef H2_GUARD
+#define H2_GUARD
+
+#define TWO 2
+
+void bar();
+
+#endif // H2_GUARD
+
+//--- h3.h
+#ifndef H3_GUARD
+#define H3_GUARD
+
+#define THREE 3
+
+void baz();
+
+#endif // H3_GUARD
+
+//--- h4.h
+#ifndef H4_GUARD
+#define H4_GUARD
+
+#define FOUR 4
+
+void boo();
+
+#endif // H4_GUARD
+
+//--- h5.h
+#ifndef H5_GUARD
+#define H5_GUARD
+
+#define FIVE 5
+
+void five();
+
+#endif // H4_GUARD
+
+//--- Xlate.cpp
+/* some comment ...
+  ... */
+module /*nothing here*/;
+
+// This should be include-translated, when the header unit for h1 is available.
+#include "h1.h" // expected-warning {{treating #include as an import of module './h1.h'}}
+// Import of a header unit is allowed, named modules are not.
+import "h2.h";
+// A regular, untranslated, header
+#include "h5.h"
+
+export module Xlate;
+
+// This is OK, the import immediately follows the module decl.
+import "h3.h";
+
+// This should *not* be include-translated, even if header unit for h4 is
+// available.
+#include "h4.h"
+
+export void charlie() {
+  foo();
+  bar();
+  baz();
+  boo();
+  five();
+}
+
+// CHECK: #pragma clang module import "./h1.h"
+// CHECK: import ./h2.h
+// CHECK: import ./h3.h
+// CHECK-NOT: #pragma clang module import "./h4.h"
Index: clang/lib/Parse/Parser.cpp
===================================================================
--- clang/lib/Parse/Parser.cpp
+++ clang/lib/Parse/Parser.cpp
@@ -663,12 +663,22 @@
     return false;
   }
 
-  case tok::annot_module_include:
-    Actions.ActOnModuleInclude(Tok.getLocation(),
-                               reinterpret_cast<Module *>(
-                                   Tok.getAnnotationValue()));
+  case tok::annot_module_include: {
+    auto Loc = Tok.getLocation();
+    Module *Mod = reinterpret_cast<Module *>(Tok.getAnnotationValue());
+    // FIXME: We need a better way to disambiguate C++ clang modules and
+    // standard C++ modules.
+    if (!getLangOpts().CPlusPlusModules || !Mod->isHeaderUnit())
+      Actions.ActOnModuleInclude(Loc, Mod);
+    else {
+      DeclResult Import =
+          Actions.ActOnModuleImport(Loc, SourceLocation(), Loc, Mod);
+      Decl *ImportDecl = Import.isInvalid() ? nullptr : Import.get();
+      Result = Actions.ConvertDeclToDeclGroup(ImportDecl);
+    }
     ConsumeAnnotationToken();
     return false;
+  }
 
   case tok::annot_module_begin:
     Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -941,6 +941,9 @@
 
   // Update ImportSeqState to track our position within a C++20 import-seq
   // if this token is being produced as a result of phase 4 of translation.
+  // Update TrackGMFState to decide if we are currently in a Global Module
+  // Fragment. GMF state updates should precede ImportSeq ones, since GMF state
+  // depends on the prevailing ImportSeq state in two cases.
   if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
       !Result.getFlag(Token::IsReinjected)) {
     switch (Result.getKind()) {
@@ -953,7 +956,11 @@
     case tok::r_brace:
       ImportSeqState.handleCloseBrace();
       break;
+    // This token is injected to represent the translation of '#include "a.h"'
+    // into "import a.h;". Mimic the notional ';'.
+    case tok::annot_module_include:
     case tok::semi:
+      TrackGMFState.handleSemi();
       ImportSeqState.handleSemi();
       break;
     case tok::header_name:
@@ -961,10 +968,12 @@
       ImportSeqState.handleHeaderName();
       break;
     case tok::kw_export:
+      TrackGMFState.handleExport();
       ImportSeqState.handleExport();
       break;
     case tok::identifier:
       if (Result.getIdentifierInfo()->isModulesImport()) {
+        TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq());
         ImportSeqState.handleImport();
         if (ImportSeqState.afterImportSeq()) {
           ModuleImportLoc = Result.getLocation();
@@ -973,9 +982,13 @@
           CurLexerKind = CLK_LexAfterModuleImport;
         }
         break;
+      } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
+        TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq());
+        break;
       }
       LLVM_FALLTHROUGH;
     default:
+      TrackGMFState.handleMisc();
       ImportSeqState.handleMisc();
       break;
     }
@@ -1222,6 +1235,7 @@
       LLVM_FALLTHROUGH;
 
     case ImportAction::ModuleImport:
+    case ImportAction::HeaderUnitImport:
     case ImportAction::SkippedModuleImport:
       // We chose to import (or textually enter) the file. Convert the
       // header-name token into a header unit annotation token.
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -1983,6 +1983,10 @@
     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
                          tok::annot_module_begin, Action.ModuleForHeader);
     break;
+  case ImportAction::HeaderUnitImport:
+    EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
+                         Action.ModuleForHeader);
+    break;
   case ImportAction::ModuleImport:
     EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
                          tok::annot_module_include, Action.ModuleForHeader);
@@ -2191,6 +2195,17 @@
   // known to have no effect beyond its effect on module visibility -- that is,
   // if it's got an include guard that is already defined, set to Import if it
   // is a modular header we've already built and should import.
+
+  // For C++20 Modules
+  // [cpp.include]/7 If the header identified by the header-name denotes an
+  // importable header, it is implementation-defined whether the #include
+  // preprocessing directive is instead replaced by an import directive.
+  // For this implementation, the translation is permitted when we are parsing
+  // the Global Module Fragment, and not otherwise (the cases where it would be
+  // valid to replace an include with an import are highly constrained once in
+  // named module purview; this choice avoids considerable complexity in
+  // determining valid cases).
+
   enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
 
   if (PPOpts->SingleFileParseMode)
@@ -2203,13 +2218,34 @@
       alreadyIncluded(*File))
     Action = IncludeLimitReached;
 
+  bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule &&
+                               !isForModuleBuilding(SuggestedModule.getModule(),
+                                                    getLangOpts().CurrentModule,
+                                                    getLangOpts().ModuleName);
+
+  // FIXME: We do not have a good way to disambiguate C++ clang modules from
+  // C++ standard modules (other than use/non-use of Header Units).
+  Module *SM = SuggestedModule.getModule();
+  // Maybe a usable Header Unit
+  bool UsableHeaderUnit = false;
+  if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
+    if (TrackGMFState.inGMF() || IsImportDecl)
+      UsableHeaderUnit = true;
+    else if (!IsImportDecl) {
+      // This is a Header Unit that we do not include-translate
+      SuggestedModule = ModuleMap::KnownHeader();
+      SM = nullptr;
+    }
+  }
+  // Maybe a usable clang header module.
+  bool UsableHeaderModule =
+      (getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
+      !SM->isHeaderUnit();
+
   // Determine whether we should try to import the module for this #include, if
   // there is one. Don't do so if precompiled module support is disabled or we
   // are processing this module textually (because we're building the module).
-  if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
-      !isForModuleBuilding(SuggestedModule.getModule(),
-                           getLangOpts().CurrentModule,
-                           getLangOpts().ModuleName)) {
+  if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) {
     // If this include corresponds to a module but that module is
     // unavailable, diagnose the situation and bail out.
     // FIXME: Remove this; loadModule does the same check (but produces
@@ -2226,7 +2262,7 @@
     // FIXME: Should we have a second loadModule() overload to avoid this
     // extra lookup step?
     SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
-    for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
+    for (Module *Mod = SM; Mod; Mod = Mod->Parent)
       Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
                                     FilenameTok.getLocation()));
     std::reverse(Path.begin(), Path.end());
@@ -2293,9 +2329,12 @@
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
   if (Action == Enter && File &&
-      !HeaderInfo.ShouldEnterIncludeFile(
-          *this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules,
-          SuggestedModule.getModule(), IsFirstIncludeOfFile)) {
+      !HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
+                                         EnterOnce, getLangOpts().Modules, SM,
+                                         IsFirstIncludeOfFile)) {
+    // C++ standard modules:
+    // If we are not in the GMF, then we textually include only
+    // clang modules:
     // Even if we've already preprocessed this header once and know that we
     // don't need to see its contents again, we still need to import it if it's
     // modular because we might not have imported it from this submodule before.
@@ -2303,7 +2342,10 @@
     // FIXME: We don't do this when compiling a PCH because the AST
     // serialization layer can't cope with it. This means we get local
     // submodule visibility semantics wrong in that case.
-    Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
+    if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
+      Action = TrackGMFState.inGMF() ? Import : Skip;
+    else
+      Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
   }
 
   // Check for circular inclusion of the main file.
@@ -2440,8 +2482,8 @@
   switch (Action) {
   case Skip:
     // If we don't need to enter the file, stop now.
-    if (Module *M = SuggestedModule.getModule())
-      return {ImportAction::SkippedModuleImport, M};
+    if (SM)
+      return {ImportAction::SkippedModuleImport, SM};
     return {ImportAction::None};
 
   case IncludeLimitReached:
@@ -2451,16 +2493,15 @@
 
   case Import: {
     // If this is a module import, make it visible if needed.
-    Module *M = SuggestedModule.getModule();
-    assert(M && "no module to import");
+    assert(SM && "no module to import");
 
-    makeModuleVisible(M, EndLoc);
+    makeModuleVisible(SM, EndLoc);
 
     if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
         tok::pp___include_macros)
       return {ImportAction::None};
 
-    return {ImportAction::ModuleImport, M};
+    return {ImportAction::ModuleImport, SM};
   }
 
   case Enter:
@@ -2492,13 +2533,14 @@
     return {ImportAction::None};
 
   // Determine if we're switching to building a new submodule, and which one.
-  if (auto *M = SuggestedModule.getModule()) {
-    if (M->getTopLevelModule()->ShadowingModule) {
+  // This does not apply for C++20 modules header units.
+  if (SM && !SM->isHeaderUnit()) {
+    if (SM->getTopLevelModule()->ShadowingModule) {
       // We are building a submodule that belongs to a shadowed module. This
       // means we find header files in the shadowed module.
-      Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
-        << M->getFullModuleName();
-      Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
+      Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
+          << SM->getFullModuleName();
+      Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
            diag::note_previous_definition);
       return {ImportAction::None};
     }
@@ -2511,22 +2553,22 @@
     // that PCH, which means we should enter the submodule. We need to teach
     // the AST serialization layer to deal with the resulting AST.
     if (getLangOpts().CompilingPCH &&
-        isForModuleBuilding(M, getLangOpts().CurrentModule,
+        isForModuleBuilding(SM, getLangOpts().CurrentModule,
                             getLangOpts().ModuleName))
       return {ImportAction::None};
 
     assert(!CurLexerSubmodule && "should not have marked this as a module yet");
-    CurLexerSubmodule = M;
+    CurLexerSubmodule = SM;
 
     // Let the macro handling code know that any future macros are within
     // the new submodule.
-    EnterSubmodule(M, EndLoc, /*ForPragma*/false);
+    EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
 
     // Let the parser know that any future declarations are within the new
     // submodule.
     // FIXME: There's no point doing this if we're handling a #__include_macros
     // directive.
-    return {ImportAction::ModuleBegin, M};
+    return {ImportAction::ModuleBegin, SM};
   }
 
   assert(!IsImportDecl && "failed to diagnose missing module for import decl");
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -385,6 +385,7 @@
 
     bool atTopLevel() { return S <= 0; }
     bool afterImportSeq() { return S == AfterImportSeq; }
+    bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
 
   private:
     State S;
@@ -397,6 +398,67 @@
   /// Our current position within a C++20 import-seq.
   ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
 
+  /// Track whether we are in a Global Module Fragment
+  class TrackGMF {
+  public:
+    enum GMFState : int {
+      GMFActive = 1,
+      MaybeGMF = 0,
+      BeforeGMFIntroducer = -1,
+      GMFAbsentOrEnded = -2,
+    };
+
+    TrackGMF(GMFState S) : S(S) {}
+
+    /// Saw a semicolon.
+    void handleSemi() {
+      // If it is immediately after the first instance of the module keyword,
+      // then that introduces the GMF.
+      if (S == MaybeGMF)
+        S = GMFActive;
+    }
+
+    /// Saw an 'export' identifier.
+    void handleExport() {
+      // The presence of an 'export' keyword always ends or excludes a GMF.
+      S = GMFAbsentOrEnded;
+    }
+
+    /// Saw an 'import' identifier.
+    void handleImport(bool AfterTopLevelTokenSeq) {
+      // If we see this before any 'module' kw, then we have no GMF.
+      if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
+        S = GMFAbsentOrEnded;
+    }
+
+    /// Saw a 'module' identifier.
+    void handleModule(bool AfterTopLevelTokenSeq) {
+      // This was the first module identifier and not preceded by any token
+      // that would exclude a GMF.  It could begin a GMF, but only if directly
+      // followed by a semicolon.
+      if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
+        S = MaybeGMF;
+      else
+        S = GMFAbsentOrEnded;
+    }
+
+    /// Saw any other token.
+    void handleMisc() {
+      // We saw something other than ; after the 'module' kw, so not a GMF.
+      if (S == MaybeGMF)
+        S = GMFAbsentOrEnded;
+    }
+
+    bool inGMF() { return S == GMFActive; }
+
+  private:
+    /// Track the transitions into and out of a Global Module Fragment,
+    /// if one is present.
+    GMFState S;
+  };
+
+  TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
+
   /// Whether the module import expects an identifier next. Otherwise,
   /// it expects a '.' or ';'.
   bool ModuleImportExpectsIdentifier = false;
@@ -2414,6 +2476,7 @@
       None,
       ModuleBegin,
       ModuleImport,
+      HeaderUnitImport,
       SkippedModuleImport,
       Failure,
     } Kind;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to