sammccall created this revision.
sammccall added a reviewer: ioeric.
Herald added subscribers: cfe-commits, kadircet, arphaman, jkorous, MaskRay, 
ilya-biryukov.
Herald added a project: clang.

Typically used with umbrella headers, e.g. GTK:

#if !defined (__GTK_H_INSIDE__) && !defined (GTK_COMPILATION)
 #error "Only <gtk/gtk.h> can be included directly."
 #endif

Heuristic is fairly conservative, a quick code search over github showed
a fair number of hits and few/no false positives. (Not all were umbrella
headers, but I'd be happy avoiding include insertion for all of them).

We may want to relax the heuristic later to catch more cases.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D60815

Files:
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  unittests/clangd/SymbolCollectorTests.cpp

Index: unittests/clangd/SymbolCollectorTests.cpp
===================================================================
--- unittests/clangd/SymbolCollectorTests.cpp
+++ unittests/clangd/SymbolCollectorTests.cpp
@@ -1064,8 +1064,19 @@
   auto TU = TestTU::withHeaderCode("int x();");
   EXPECT_THAT(TU.headerSymbols(), ElementsAre(IncludeHeader()));
 
+  // Files missing include guards aren't eligible for insertion.
   TU.ImplicitHeaderGuard = false;
   EXPECT_THAT(TU.headerSymbols(), ElementsAre(Not(IncludeHeader())));
+
+  // We recognize some patterns of trying to prevent insertion.
+  TU = TestTU::withHeaderCode(R"cpp(
+#ifndef SECRET
+#error "This file isn't safe to include directly"
+#endif
+    int x();
+    )cpp");
+  TU.ExtraArgs.push_back("-DSECRET"); // *we're* able to include it.
+  EXPECT_THAT(TU.headerSymbols(), ElementsAre(Not(IncludeHeader())));
 }
 
 TEST_F(SymbolCollectorTest, AvoidUsingFwdDeclsAsCanonicalDecls) {
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -19,6 +19,7 @@
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Regex.h"
 #include <functional>
 
 namespace clang {
@@ -117,6 +118,15 @@
                                bool IsMainFileSymbol);
   void addDefinition(const NamedDecl &, const Symbol &DeclSymbol);
 
+  llvm::Optional<std::string> getIncludeHeader(llvm::StringRef QName, FileID);
+  bool isSelfContainedHeader(FileID);
+  // Heuristic to detect headers that aren't self-contained, usually because
+  // they need to be included via an umbrella header. e.g. GTK matches this.
+  llvm::Regex DontIncludeMe = {
+      "^[ \t]*#[ \t]*if.*\n"         // An #if, #ifndef etc directive, then
+      "[ \t]*#[ \t]*error.*include", // an #error directive mentioning "include"
+      llvm::Regex::Newline};
+
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;
   // All refs collected from the AST.
@@ -141,6 +151,7 @@
   llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
   // Cache whether to index a file or not.
   llvm::DenseMap<FileID, bool> FilesToIndexCache;
+  llvm::DenseMap<FileID, bool> HeaderIsSelfContainedCache;
 };
 
 } // namespace clangd
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -128,48 +128,6 @@
   }
 }
 
-bool isSelfContainedHeader(FileID FID, const SourceManager &SM,
-                           const Preprocessor &PP) {
-  const FileEntry *FE = SM.getFileEntryForID(FID);
-  if (!FE)
-    return false;
-  return PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE);
-}
-
-/// Gets a canonical include (URI of the header or <header> or "header") for
-/// header of \p FID (which should usually be the *expansion* file).
-/// Returns None if includes should not be inserted for this file.
-llvm::Optional<std::string>
-getIncludeHeader(llvm::StringRef QName, const SourceManager &SM,
-                 const Preprocessor &PP, FileID FID,
-                 const SymbolCollector::Options &Opts) {
-  const FileEntry *FE = SM.getFileEntryForID(FID);
-  if (!FE || FE->getName().empty())
-    return llvm::None;
-  llvm::StringRef Filename = FE->getName();
-  // If a file is mapped by canonical headers, use that mapping, regardless
-  // of whether it's an otherwise-good header (header guards etc).
-  if (Opts.Includes) {
-    llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
-    // If we had a mapping, always use it.
-    if (Canonical.startswith("<") || Canonical.startswith("\""))
-      return Canonical.str();
-    if (Canonical != Filename)
-      return toURI(SM, Canonical, Opts);
-  }
-  if (!isSelfContainedHeader(FID, SM, PP)) {
-    // A .inc or .def file is often included into a real header to define
-    // symbols (e.g. LLVM tablegen files).
-    if (Filename.endswith(".inc") || Filename.endswith(".def"))
-      return getIncludeHeader(QName, SM, PP,
-                              SM.getFileID(SM.getIncludeLoc(FID)), Opts);
-    // Conservatively refuse to insert #includes to files without guards.
-    return llvm::None;
-  }
-  // Standard case: just insert the file itself.
-  return toURI(SM, Filename, Opts);
-}
-
 // Return the symbol range of the token at \p TokLoc.
 std::pair<SymbolLocation::Position, SymbolLocation::Position>
 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
@@ -452,9 +410,8 @@
 
   std::string Include;
   if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) {
-    if (auto Header =
-            getIncludeHeader(Name->getName(), SM, *PP,
-                             SM.getDecomposedExpansionLoc(DefLoc).first, Opts))
+    if (auto Header = getIncludeHeader(
+            Name->getName(), SM.getDecomposedExpansionLoc(DefLoc).first))
       Include = std::move(*Header);
   }
   S.Signature = Signature;
@@ -533,6 +490,7 @@
   ReferencedMacros.clear();
   DeclRefs.clear();
   FilesToIndexCache.clear();
+  HeaderIsSelfContainedCache.clear();
 }
 
 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
@@ -602,8 +560,7 @@
     // Use the expansion location to get the #include header since this is
     // where the symbol is exposed.
     if (auto Header = getIncludeHeader(
-            QName, SM, *PP,
-            SM.getDecomposedExpansionLoc(ND.getLocation()).first, Opts))
+            QName, SM.getDecomposedExpansionLoc(ND.getLocation()).first))
       Include = std::move(*Header);
   }
   if (!Include.empty())
@@ -639,5 +596,63 @@
   Symbols.insert(S);
 }
 
+/// Gets a canonical include (URI of the header or <header> or "header") for
+/// header of \p FID (which should usually be the *expansion* file).
+/// Returns None if includes should not be inserted for this file.
+llvm::Optional<std::string>
+SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) {
+  const SourceManager &SM = ASTCtx->getSourceManager();
+  const FileEntry *FE = SM.getFileEntryForID(FID);
+  if (!FE || FE->getName().empty())
+    return llvm::None;
+  llvm::StringRef Filename = FE->getName();
+  // If a file is mapped by canonical headers, use that mapping, regardless
+  // of whether it's an otherwise-good header (header guards etc).
+  if (Opts.Includes) {
+    llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
+    // If we had a mapping, always use it.
+    if (Canonical.startswith("<") || Canonical.startswith("\""))
+      return Canonical.str();
+    if (Canonical != Filename)
+      return toURI(SM, Canonical, Opts);
+  }
+  if (!isSelfContainedHeader(FID)) {
+    // A .inc or .def file is often included into a real header to define
+    // symbols (e.g. LLVM tablegen files).
+    if (Filename.endswith(".inc") || Filename.endswith(".def"))
+      return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID)));
+    // Conservatively refuse to insert #includes to files without guards.
+    return llvm::None;
+  }
+  // Standard case: just insert the file itself.
+  return toURI(SM, Filename, Opts);
+}
+
+bool SymbolCollector::isSelfContainedHeader(FileID FID) {
+  // The real computation (which will be memoized).
+  auto Compute = [&] {
+    const SourceManager &SM = ASTCtx->getSourceManager();
+    const FileEntry *FE = SM.getFileEntryForID(FID);
+    if (!FE)
+      return false;
+    if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
+      return false;
+    std::string Err;
+    if (!DontIncludeMe.isValid(Err))
+      llvm::errs() << "err " << Err << "\n";
+    llvm::errs() << "matching \n" << SM.getBufferData(FID);
+    if (DontIncludeMe.match(SM.getBufferData(FID)))
+      return false;
+    if (!DontIncludeMe.isValid(Err))
+      llvm::errs() << "err " << Err << "\n";
+    return true;
+  };
+
+  auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
+  if (R.second)
+    R.first->second = Compute();
+  return R.first->second;
+}
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to