jansvoboda11 created this revision.
jansvoboda11 added reviewers: benlangmuir, vsapsai, Bigcheese.
Herald added subscribers: ributzka, mgrang.
Herald added a project: All.
jansvoboda11 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

When building a module consisting of submodules, the preprocessor maintains a 
global set of included headers. This information gets serialized into the PCM 
file (specifically into the HeaderFileInfo table). After loading such PCM file, 
this information is deserialized into the state of the importing preprocessor. 
This happens even if the headers were included by (sub)modules that are not 
visible. This can incorrectly prevent imports of textual headers in the 
importing instance (see attached tests).

This patch fixes this bug splitting the set of included files per submodule. 
This is an alternative to D112915 <https://reviews.llvm.org/D112915> and 
D104344 <https://reviews.llvm.org/D104344>.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D155503

Files:
  clang/include/clang/Lex/Preprocessor.h
  clang/lib/Lex/HeaderSearch.cpp
  clang/lib/Lex/Preprocessor.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/lib/Serialization/ASTReaderInternals.h
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/Modules/import-submodule-visibility.c

Index: clang/test/Modules/import-submodule-visibility.c
===================================================================
--- /dev/null
+++ clang/test/Modules/import-submodule-visibility.c
@@ -0,0 +1,64 @@
+// This test checks that imports of headers that appeared in a different submodule than
+// what is imported by the current TU don't affect the compilation.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- C/C.h
+#include "Textual.h"
+//--- C/module.modulemap
+module C { header "C.h" }
+
+//--- D/D1.h
+#include "Textual.h"
+//--- D/D2.h
+//--- D/module.modulemap
+module D {
+  module D1 { header "D1.h" }
+  module D2 { header "D2.h" }
+}
+
+//--- E/E1.h
+#include "E2.h"
+//--- E/E2.h
+#include "Textual.h"
+//--- E/module.modulemap
+module E {
+  module E1 { header "E1.h" }
+  module E2 { header "E2.h" }
+}
+
+//--- Textual.h
+#define MACRO_TEXTUAL 1
+
+//--- test_top.c
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+//--- test_sub.c
+#import "D/D2.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+//--- test_transitive.c
+#import "E/E1.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// Simply loading a PCM file containing top-level module including a header does
+// not prevent inclusion of that header in the TU.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/C/module.modulemap -fmodule-name=C -o %t/C.pcm
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_top.c -fmodule-file=%t/C.pcm
+
+// Loading a PCM file and importing its empty submodule does not prevent
+// inclusion of headers included by invisible sibling submodules.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/D/module.modulemap -fmodule-name=D -o %t/D.pcm
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_sub.c -fmodule-file=%t/D.pcm
+
+// Loading a PCM file and importing a submodule does not prevent inclusion of
+// headers included by some of its transitive un-exported dependencies.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/E/module.modulemap -fmodule-name=E -o %t/E.pcm
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_transitive.c -fmodule-file=%t/E.pcm
Index: clang/lib/Serialization/ASTWriter.cpp
===================================================================
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -1762,7 +1762,7 @@
 
     struct data_type {
       const HeaderFileInfo &HFI;
-      bool AlreadyIncluded;
+      SmallVector<uint32_t, 4> IncludedIn;
       ArrayRef<ModuleMap::KnownHeader> KnownHeaders;
       UnresolvedModule Unresolved;
     };
@@ -1787,6 +1787,7 @@
           DataLen += 4;
       if (Data.Unresolved.getPointer())
         DataLen += 4;
+      DataLen += 4 + 4 * Data.IncludedIn.size();
       return emitULEBKeyDataLength(KeyLen, DataLen, Out);
     }
 
@@ -1808,8 +1809,7 @@
       endian::Writer LE(Out, little);
       uint64_t Start = Out.tell(); (void)Start;
 
-      unsigned char Flags = (Data.AlreadyIncluded << 6)
-                          | (Data.HFI.isImport << 5)
+      unsigned char Flags = (Data.HFI.isImport << 5)
                           | (Data.HFI.isPragmaOnce << 4)
                           | (Data.HFI.DirInfo << 1)
                           | Data.HFI.IndexHeaderMapHeader;
@@ -1820,6 +1820,10 @@
       else
         LE.write<uint32_t>(Writer.getIdentifierRef(Data.HFI.ControllingMacro));
 
+      LE.write<uint32_t>(Data.IncludedIn.size());
+      for (uint32_t ModID : Data.IncludedIn)
+        LE.write<uint32_t>(ModID);
+
       unsigned Offset = 0;
       if (!Data.HFI.Framework.empty()) {
         // If this header refers into a framework, save the framework name.
@@ -1910,7 +1914,7 @@
         HeaderFileInfoTrait::key_type Key = {
             FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
         HeaderFileInfoTrait::data_type Data = {
-            Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+            Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
         // FIXME: Deal with cases where there are multiple unresolved header
         // directives in different submodules for the same header.
         Generator.insert(Key, Data, GeneratorTrait);
@@ -1953,13 +1957,19 @@
       SavedStrings.push_back(Filename.data());
     }
 
-    bool Included = PP->alreadyIncluded(File);
+    SmallVector<uint32_t, 4> IncludedIn;
+    PP->forModulesIncluding(File, [&](Module *M) {
+      if ((!M && !WritingModule) ||
+          (M && M->getTopLevelModule() == WritingModule))
+        IncludedIn.push_back(getSubmoduleID(M));
+    });
+    llvm::sort(IncludedIn);
 
     HeaderFileInfoTrait::key_type Key = {
       Filename, File->getSize(), getTimestampForOutput(File)
     };
     HeaderFileInfoTrait::data_type Data = {
-      *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(File), {}
+      *HFI, IncludedIn, HS.getModuleMap().findResolvedModulesForHeader(File), {}
     };
     Generator.insert(Key, Data, GeneratorTrait);
     ++NumHeaderSearchEntries;
Index: clang/lib/Serialization/ASTReaderInternals.h
===================================================================
--- clang/lib/Serialization/ASTReaderInternals.h
+++ clang/lib/Serialization/ASTReaderInternals.h
@@ -15,6 +15,7 @@
 
 #include "MultiOnDiskHashTable.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/Basic/FileEntry.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Serialization/ASTBitCodes.h"
 #include "llvm/ADT/DenseSet.h"
@@ -27,7 +28,6 @@
 namespace clang {
 
 class ASTReader;
-class FileEntry;
 struct HeaderFileInfo;
 class HeaderSearch;
 class ObjCMethodDecl;
@@ -278,7 +278,7 @@
   data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
 
 private:
-  const FileEntry *getFile(const internal_key_type &Key);
+  OptionalFileEntryRef getFile(const internal_key_type &Key);
 };
 
 /// The on-disk hash table used for known header files.
Index: clang/lib/Serialization/ASTReader.cpp
===================================================================
--- clang/lib/Serialization/ASTReader.cpp
+++ clang/lib/Serialization/ASTReader.cpp
@@ -1875,19 +1875,15 @@
   return LocalID + I->second;
 }
 
-const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
+OptionalFileEntryRef
+HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
   FileManager &FileMgr = Reader.getFileManager();
-  if (!Key.Imported) {
-    if (auto File = FileMgr.getFile(Key.Filename))
-      return *File;
-    return nullptr;
-  }
+  if (!Key.Imported)
+    return FileMgr.getOptionalFileRef(Key.Filename);
 
   std::string Resolved = std::string(Key.Filename);
   Reader.ResolveImportedPath(M, Resolved);
-  if (auto File = FileMgr.getFile(Resolved))
-    return *File;
-  return nullptr;
+  return FileMgr.getOptionalFileRef(Resolved);
 }
 
 unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
@@ -1910,8 +1906,8 @@
     return true;
 
   // Determine whether the actual files are equivalent.
-  const FileEntry *FEA = getFile(a);
-  const FileEntry *FEB = getFile(b);
+  OptionalFileEntryRef FEA = getFile(a);
+  OptionalFileEntryRef FEB = getFile(b);
   return FEA && FEA == FEB;
 }
 
@@ -1937,17 +1933,15 @@
                               unsigned DataLen) {
   using namespace llvm::support;
 
+  OptionalFileEntryRef FE = getFile(key);
+  // We can only get here if the key of this entry is an absolute path that
+  // matches the name of an already known FileEntry, or the FileEntry given out
+  // by FileManager for this key compares equal to the alrleady known FileEntry.
+  assert(FE && "Missing FileEntry during HeaderFileInfo deserialization");
+
   const unsigned char *End = d + DataLen;
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
-
-  bool Included = (Flags >> 6) & 0x01;
-  if (Included)
-    if (const FileEntry *FE = getFile(key))
-      // Not using \c Preprocessor::markIncluded(), since that would attempt to
-      // deserialize this header file info again.
-      Reader.getPreprocessor().getIncludedFiles().insert(FE);
-
   // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
   HFI.isImport |= (Flags >> 5) & 0x01;
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -1955,6 +1949,18 @@
   HFI.IndexHeaderMapHeader = Flags & 0x01;
   HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
       M, endian::readNext<uint32_t, little, unaligned>(d));
+
+  Preprocessor &PP = Reader.getPreprocessor();
+  auto IncludedInCount = endian::readNext<uint32_t, little, unaligned>(d);
+  for (uint32_t I = 0; I < IncludedInCount; ++I) {
+    uint32_t ModID = endian::readNext<uint32_t, little, unaligned>(d);
+    Module *Mod = Reader.getModule(ModID);
+    if (Mod)
+      PP.getFilesIncludedIn(Mod).insert(*FE);
+    if (!Mod || PP.isModuleCurrentlyVisible(Mod))
+      PP.getIncludedFiles().insert(*FE);
+  }
+
   if (unsigned FrameworkOffset =
           endian::readNext<uint32_t, little, unaligned>(d)) {
     // The framework offset is 1 greater than the actual offset,
@@ -1974,18 +1980,11 @@
     // implicit module import.
     SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
     Module *Mod = Reader.getSubmodule(GlobalSMID);
-    FileManager &FileMgr = Reader.getFileManager();
-    ModuleMap &ModMap =
-        Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
-
-    std::string Filename = std::string(key.Filename);
-    if (key.Imported)
-      Reader.ResolveImportedPath(M, Filename);
-    if (auto FE = FileMgr.getOptionalFileRef(Filename)) {
-      // FIXME: NameAsWritten
-      Module::Header H = {std::string(key.Filename), "", *FE};
-      ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
-    }
+    ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+
+    // FIXME: NameAsWritten
+    Module::Header H = {std::string(key.Filename), "", *FE};
+    ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
     HFI.isModuleHeader |= ModuleMap::isModular(HeaderRole);
   }
 
Index: clang/lib/Lex/Preprocessor.cpp
===================================================================
--- clang/lib/Lex/Preprocessor.cpp
+++ clang/lib/Lex/Preprocessor.cpp
@@ -559,7 +559,7 @@
     // Tell the header info that the main file was entered.  If the file is later
     // #imported, it won't be re-entered.
     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
-      markIncluded(FE);
+      markDirectlyIncluded(FE);
   }
 
   // Preprocess Predefines to populate the initial preprocessor state.
@@ -1340,7 +1340,14 @@
 
 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
   CurSubmoduleState->VisibleModules.setVisible(
-      M, Loc, [](Module *) {},
+      M, Loc,
+      [&](Module *M) {
+        // The set of included files may be incomplete for modules coming from
+        // a PCM, since we're deserializing included files lazily. The rest of
+        // the files will be handled lazily, as we need them.
+        for (const FileEntry *FE : getFilesIncludedIn(M))
+          markTransitivelyIncluded(FE);
+      },
       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
         // FIXME: Include the path in the diagnostic.
         // FIXME: Include the import location for the conflicting module.
Index: clang/lib/Lex/HeaderSearch.cpp
===================================================================
--- clang/lib/Lex/HeaderSearch.cpp
+++ clang/lib/Lex/HeaderSearch.cpp
@@ -1482,7 +1482,7 @@
     }
   }
 
-  IsFirstIncludeOfFile = PP.markIncluded(File);
+  IsFirstIncludeOfFile = PP.markDirectlyIncluded(File);
 
   return true;
 }
Index: clang/include/clang/Lex/Preprocessor.h
===================================================================
--- clang/include/clang/Lex/Preprocessor.h
+++ clang/include/clang/Lex/Preprocessor.h
@@ -991,6 +991,10 @@
   /// in a submodule.
   SubmoduleState *CurSubmoduleState;
 
+  /// The set of files that have been included in each submodule.
+  /// Files included outside of any module (e.g. in PCH) have nullptr key.
+  llvm::DenseMap<Module *, IncludedFilesSet> IncludedFilesPerSubmodule;
+
   /// The files that have been included.
   IncludedFilesSet IncludedFiles;
 
@@ -1477,10 +1481,25 @@
     return AffectingClangModules;
   }
 
-  /// Mark the file as included.
+  /// Mark the file as included in the current state.
+  /// Do not attribute it to the current module - this file was included by one
+  /// of its dependencies (transitively).
+  void markTransitivelyIncluded(const FileEntry *File) {
+    HeaderInfo.getFileInfo(File);
+    IncludedFiles.insert(File);
+  }
+
+  /// Mark the file as included in the current state and attribute it to the
+  /// current module.
   /// Returns true if this is the first time the file was included.
-  bool markIncluded(const FileEntry *File) {
+  bool markDirectlyIncluded(const FileEntry *File) {
     HeaderInfo.getFileInfo(File);
+
+    Module *M = !BuildingSubmoduleStack.empty()
+                    ? BuildingSubmoduleStack.back().M
+                    : getCurrentModule();
+    IncludedFilesPerSubmodule[M].insert(File);
+
     return IncludedFiles.insert(File).second;
   }
 
@@ -1490,6 +1509,20 @@
     return IncludedFiles.count(File);
   }
 
+  /// Invoke the callback for every module known to include the given file.
+  void forModulesIncluding(const FileEntry *FE,
+                           llvm::function_ref<void(Module *)> Cb) {
+    // TODO: Maintain a data-structure that provides this more efficiently.
+    for (const auto &Entry : IncludedFilesPerSubmodule)
+      if (Entry.second.contains(FE))
+        Cb(Entry.first);
+  }
+
+  /// Get the set of files included in the given module.
+  IncludedFilesSet &getFilesIncludedIn(Module *M) {
+    return IncludedFilesPerSubmodule[M];
+  }
+
   /// Get the set of included files.
   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
@@ -1730,6 +1763,11 @@
     return CurSubmoduleState->VisibleModules.getImportLoc(M);
   }
 
+  /// Determine whether the given module is currently visible.
+  bool isModuleCurrentlyVisible(Module *M) const {
+    return CurSubmoduleState->VisibleModules.isVisible(M);
+  }
+
   /// Lex a string literal, which may be the concatenation of multiple
   /// string literals and may even come from macro expansion.
   /// \returns true on success, false if a error diagnostic has been generated.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D155503: [clang][modul... Jan Svoboda via Phabricator via cfe-commits

Reply via email to