kadircet updated this revision to Diff 206231.
kadircet marked 9 inline comments as done.
kadircet added a comment.

- Rename SystemIncludeExtractor to QueryDriverDatabase
- Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D62804/new/

https://reviews.llvm.org/D62804

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/ClangdLSPServer.cpp
  clang-tools-extra/clangd/ClangdServer.h
  clang-tools-extra/clangd/GlobalCompilationDatabase.h
  clang-tools-extra/clangd/QueryDriverDatabase.cpp
  clang-tools-extra/clangd/test/system-include-extractor.test
  clang-tools-extra/clangd/tool/ClangdMain.cpp

Index: clang-tools-extra/clangd/tool/ClangdMain.cpp
===================================================================
--- clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -268,6 +268,15 @@
                                     "Always used text-based completion")),
         llvm::cl::init(CodeCompleteOptions().RunParser), llvm::cl::Hidden);
 
+static llvm::cl::list<std::string> QueryDriverGlobs(
+    "query-driver",
+    llvm::cl::desc(
+        "Comma separated list of globs for white-listing gcc-compatible "
+        "drivers that are safe to execute. Drivers matching any of these globs "
+        "will be used to extract system includes. e.g. "
+        "/usr/bin/**/clang-*,/path/to/repo/**/g++-*"),
+    llvm::cl::CommaSeparated);
+
 namespace {
 
 /// \brief Supports a test URI scheme with relaxed constraints for lit tests.
@@ -521,6 +530,7 @@
     };
   }
   Opts.SuggestMissingIncludes = SuggestMissingIncludes;
+  Opts.QueryDriverGlobs = std::move(QueryDriverGlobs);
   llvm::Optional<OffsetEncoding> OffsetEncodingFromFlag;
   if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding)
     OffsetEncodingFromFlag = ForceOffsetEncoding;
Index: clang-tools-extra/clangd/test/system-include-extractor.test
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/test/system-include-extractor.test
@@ -0,0 +1,50 @@
+# RUN: rm -rf %t.dir && mkdir -p %t.dir
+
+# Generate a mock-driver that will print %temp_dir%/my/dir and
+# %temp_dir%/my/dir2 as include search paths.
+# RUN: echo '#!/bin/bash' >> %t.dir/my_driver.sh
+# RUN: echo 'echo line to ignore' >> %t.dir/my_driver.sh
+# RUN: echo 'echo \#include \<...\> search starts here:' >> %t.dir/my_driver.sh
+# RUN: echo 'echo %t.dir/my/dir/' >> %t.dir/my_driver.sh
+# RUN: echo 'echo %t.dir/my/dir2/' >> %t.dir/my_driver.sh
+# RUN: echo 'echo End of search list.' >> %t.dir/my_driver.sh
+# RUN: chmod +x %t.dir/my_driver.sh
+
+# Create header files my/dir/a.h and my/dir2/b.h
+# RUN: mkdir -p %t.dir/my/dir
+# RUN: touch %t.dir/my/dir/a.h
+# RUN: mkdir -p %t.dir/my/dir2
+# RUN: touch %t.dir/my/dir2/b.h
+
+# Generate a compile_commands.json that will query the mock driver we've
+# created. Which should add a.h and b.h into include search path.
+# RUN: echo '[{"directory": "%/t.dir", "command": "%/t.dir/my_driver.sh the-file.cpp", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json
+
+# RUN: sed -e "s|INPUT_DIR|%/t.dir|g" %s > %t.test.1
+# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..."
+# (with the extra slash in the front), so we add it here.
+# RUN: sed -e "s|file://\([A-Z]\):/|file:///\1:/|g" %t.test.1 > %t.test
+
+# Bless the mock driver we've just created so that clangd can execute it.
+# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test | FileCheck -strict-whitespace %t.test
+{"jsonrpc":"2.0","id":0,"method":"initialize","params":{}}
+---
+{
+  "jsonrpc":"2.0",
+  "method":"textDocument/didOpen",
+  "params": {
+    "textDocument": {
+      "uri": "file://INPUT_DIR/the-file.cpp",
+      "languageId":"cpp",
+      "version":1,
+      "text":"#include <a.h>\n#include <b.h>"
+    }
+  }
+}
+# CHECK:   "method": "textDocument/publishDiagnostics",
+# CHECK-NEXT:   "params": {
+# CHECK-NEXT:     "diagnostics": [],
+---
+{"jsonrpc":"2.0","id":10000,"method":"shutdown"}
+---
+{"jsonrpc":"2.0","method":"exit"}
Index: clang-tools-extra/clangd/QueryDriverDatabase.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/QueryDriverDatabase.cpp
@@ -0,0 +1,265 @@
+//===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Some compiler drivers have implicit search mechanism for system headers.
+// This compilation database implementation tries to extract that information by
+// executing the driver in verbose mode. gcc-compatible drivers print something
+// like:
+// ....
+// ....
+// #include <...> search starts here:
+//  /usr/lib/gcc/x86_64-linux-gnu/7/include
+//  /usr/local/include
+//  /usr/lib/gcc/x86_64-linux-gnu/7/include-fixed
+//  /usr/include/x86_64-linux-gnu
+//  /usr/include
+// End of search list.
+// ....
+// ....
+// This component parses that output and adds each path to command line args
+// provided by Base, after prepending them with -isystem. Therefore current
+// implementation would not work with a driver that is not gcc-compatible.
+//
+// First argument of the command line received from underlying compilation
+// database is used as compiler driver path. Due to this arbitrary binary
+// execution, this mechanism is not used by default and only executes binaries
+// in the paths that are explicitly whitelisted by the user.
+
+#include "GlobalCompilationDatabase.h"
+#include "Logger.h"
+#include "Path.h"
+#include "Trace.h"
+#include "clang/Driver/Types.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace {
+
+std::vector<std::string> parseDriverOutput(llvm::StringRef Output) {
+  std::vector<std::string> SystemIncludes;
+  constexpr char const *SIS = "#include <...> search starts here:";
+  constexpr char const *SIE = "End of search list.";
+  llvm::SmallVector<llvm::StringRef, 8> Lines;
+  Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+
+  auto StartIt = std::find(Lines.begin(), Lines.end(), SIS);
+  if (StartIt == Lines.end()) {
+    elog("System include extraction: start marker not found: {0}", Output);
+    return {};
+  }
+  ++StartIt;
+  const auto EndIt = std::find(StartIt, Lines.end(), SIE);
+  if (EndIt == Lines.end()) {
+    elog("System include extraction: end marker missing: {0}", Output);
+    return {};
+  }
+
+  for (llvm::StringRef Line : llvm::make_range(StartIt, EndIt)) {
+    SystemIncludes.push_back(Line.str());
+    vlog("System include extraction: adding {0}", Line);
+  }
+  return SystemIncludes;
+}
+
+std::vector<std::string> extractSystemIncludes(PathRef Driver,
+                                               llvm::StringRef Ext,
+                                               llvm::Regex &QueryDriverRegex) {
+  trace::Span Tracer("Extract system includes");
+  SPAN_ATTACH(Tracer, "driver", Driver);
+  SPAN_ATTACH(Tracer, "ext", Ext);
+
+  if (!QueryDriverRegex.match(Driver)) {
+    vlog("System include extraction: not whitelisted driver {0}", Driver);
+    return {};
+  }
+
+  if (!llvm::sys::fs::exists(Driver)) {
+    elog("System include extraction: {0} does not exist.", Driver);
+    return {};
+  }
+  if (!llvm::sys::fs::can_execute(Driver)) {
+    elog("System include extraction: {0} is not executable.", Driver);
+    return {};
+  }
+
+  llvm::SmallString<128> OutputPath;
+  auto EC = llvm::sys::fs::createTemporaryFile("system-includes", "clangd",
+                                               OutputPath);
+  if (EC) {
+    elog("System include extraction: failed to create temporary file with "
+         "error {0}",
+         EC.message());
+    return {};
+  }
+  auto CleanUp = llvm::make_scope_exit(
+      [&OutputPath]() { llvm::sys::fs::remove(OutputPath); });
+
+  llvm::Optional<llvm::StringRef> Redirects[] = {
+      {""}, llvm::StringRef(OutputPath), {""}};
+
+  auto Type = driver::types::lookupTypeForExtension(Ext);
+  if (Type == driver::types::TY_INVALID) {
+    elog("System include extraction: invalid file type for {0}", Ext);
+    return {};
+  }
+  // Should we also preserve flags like "-sysroot", "-nostdinc" ?
+  const llvm::StringRef Args[] = {"-E", "-x", driver::types::getTypeName(Type),
+                                  "-", "-v"};
+
+  int RC =
+      llvm::sys::ExecuteAndWait(Driver, Args, /*Env=*/llvm::None, Redirects);
+  if (RC) {
+    elog("System include extraction: driver execution failed with return code: "
+         "{0}",
+         llvm::to_string(RC));
+    return {};
+  }
+
+  auto BufOrError = llvm::MemoryBuffer::getFile(OutputPath);
+  if (!BufOrError) {
+    elog("System include extraction: failed to read {0} with error {1}",
+         OutputPath, BufOrError.getError().message());
+    return {};
+  }
+
+  auto Includes = parseDriverOutput(BufOrError->get()->getBuffer());
+  log("System include extractor: succesfully executed {0}, got includes: "
+      "\"{1}\"",
+      Driver, llvm::join(Includes, ", "));
+  return Includes;
+}
+
+tooling::CompileCommand &
+addSystemIncludes(tooling::CompileCommand &Cmd,
+                  llvm::ArrayRef<std::string> SystemIncludes) {
+  for (llvm::StringRef Include : SystemIncludes) {
+    // FIXME(kadircet): This doesn't work when we have "--driver-mode=cl"
+    Cmd.CommandLine.push_back("-isystem");
+    Cmd.CommandLine.push_back(Include.str());
+  }
+  return Cmd;
+}
+
+/// Converts a glob containing only ** or * into a regex.
+std::string convertGlobToRegex(llvm::StringRef Glob) {
+  std::string RegText;
+  llvm::raw_string_ostream RegStream(RegText);
+  RegStream << '^';
+  for (size_t I = 0, E = Glob.size(); I < E; ++I) {
+    if (Glob[I] == '*') {
+      if (I + 1 < E && Glob[I + 1] == '*') {
+        // Double star, accept any sequence.
+        RegStream << ".*";
+        // Also skip the second star.
+        ++I;
+      } else {
+        // Single star, accept any sequence without a slash.
+        RegStream << "[^/]*";
+      }
+    } else {
+      RegStream << llvm::Regex::escape(Glob.substr(I, 1));
+    }
+  }
+  RegStream << '$';
+  RegStream.flush();
+  return RegText;
+}
+
+/// Converts a glob containing only ** or * into a regex.
+llvm::Regex convertGlobsToRegex(llvm::ArrayRef<std::string> Globs) {
+  assert(!Globs.empty() && "Globs cannot be empty!");
+  std::vector<std::string> RegTexts;
+  RegTexts.reserve(Globs.size());
+  for (llvm::StringRef Glob : Globs)
+    RegTexts.push_back(convertGlobToRegex(Glob));
+
+  llvm::Regex Reg(llvm::join(RegTexts, "|"));
+  assert(Reg.isValid(RegTexts.front()) &&
+         "Created an invalid regex from globs");
+  return Reg;
+}
+
+/// Extracts system includes from a trusted driver by parsing the output of
+/// include search path and appends them to the commands coming from underlying
+/// compilation database.
+class QueryDriverDatabase : public GlobalCompilationDatabase {
+public:
+  QueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
+                      std::unique_ptr<GlobalCompilationDatabase> Base)
+      : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)),
+        Base(std::move(Base)) {
+    assert(this->Base);
+    BaseChanged =
+        this->Base->watch([this](const std::vector<std::string> &Changes) {
+          OnCommandChanged.broadcast(Changes);
+        });
+  }
+
+  llvm::Optional<tooling::CompileCommand>
+  getCompileCommand(PathRef File, ProjectInfo *PI = nullptr) const override {
+    auto Cmd = Base->getCompileCommand(File, PI);
+    if (!Cmd || Cmd->CommandLine.empty())
+      return Cmd;
+
+    llvm::SmallString<128> Driver(Cmd->CommandLine.front());
+    llvm::sys::fs::make_absolute(Cmd->Directory, Driver);
+
+    llvm::ArrayRef<std::string> SystemIncludes;
+    {
+      std::lock_guard<std::mutex> Lock(Mu);
+
+      llvm::StringRef Ext = llvm::sys::path::extension(File).trim('.');
+      auto It = DriverToIncludesCache.try_emplace({Driver, Ext});
+      if (It.second)
+        It.first->second = extractSystemIncludes(Driver, Ext, QueryDriverRegex);
+      SystemIncludes = It.first->second;
+    }
+
+    return addSystemIncludes(*Cmd, SystemIncludes);
+  }
+
+private:
+  mutable std::mutex Mu;
+  // Caches includes extracted from a driver.
+  mutable llvm::DenseMap<std::pair<StringRef, StringRef>,
+                         std::vector<std::string>>
+      DriverToIncludesCache;
+  mutable llvm::Regex QueryDriverRegex;
+
+  std::unique_ptr<GlobalCompilationDatabase> Base;
+  CommandChanged::Subscription BaseChanged;
+};
+} // namespace
+
+std::unique_ptr<GlobalCompilationDatabase>
+getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
+                       std::unique_ptr<GlobalCompilationDatabase> Base) {
+  assert(Base && "Null base to SystemIncludeExtractor");
+  if (QueryDriverGlobs.empty())
+    return Base;
+  return llvm::make_unique<QueryDriverDatabase>(QueryDriverGlobs,
+                                                std::move(Base));
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/GlobalCompilationDatabase.h
===================================================================
--- clang-tools-extra/clangd/GlobalCompilationDatabase.h
+++ clang-tools-extra/clangd/GlobalCompilationDatabase.h
@@ -91,6 +91,13 @@
   llvm::Optional<Path> CompileCommandsDir;
 };
 
+/// Extracts system include search path from drivers matching QueryDriverGlobs
+/// and adds them to the compile flags. Base may not be nullptr.
+/// Returns Base when \p QueryDriverGlobs is empty.
+std::unique_ptr<GlobalCompilationDatabase>
+getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
+                       std::unique_ptr<GlobalCompilationDatabase> Base);
+
 /// Wraps another compilation database, and supports overriding the commands
 /// using an in-memory mapping.
 class OverlayCDB : public GlobalCompilationDatabase {
Index: clang-tools-extra/clangd/ClangdServer.h
===================================================================
--- clang-tools-extra/clangd/ClangdServer.h
+++ clang-tools-extra/clangd/ClangdServer.h
@@ -124,6 +124,10 @@
         std::chrono::milliseconds(500);
 
     bool SuggestMissingIncludes = false;
+
+    /// Clangd will execute compiler drivers matching one of these globs to
+    /// fetch system include path.
+    std::vector<std::string> QueryDriverGlobs;
   };
   // Sensible default options for use in tests.
   // Features like indexing must be enabled if desired.
Index: clang-tools-extra/clangd/ClangdLSPServer.cpp
===================================================================
--- clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -9,11 +9,13 @@
 #include "ClangdLSPServer.h"
 #include "Diagnostics.h"
 #include "FormattedString.h"
+#include "GlobalCompilationDatabase.h"
 #include "Protocol.h"
 #include "SourceCode.h"
 #include "Trace.h"
 #include "URI.h"
 #include "clang/Tooling/Core/Replacement.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/Errc.h"
@@ -337,9 +339,13 @@
                                             ErrorCode::InvalidRequest));
   if (const auto &Dir = Params.initializationOptions.compilationDatabasePath)
     CompileCommandsDir = Dir;
-  if (UseDirBasedCDB)
+  if (UseDirBasedCDB) {
     BaseCDB = llvm::make_unique<DirectoryBasedGlobalCompilationDatabase>(
         CompileCommandsDir);
+    BaseCDB = getQueryDriverDatabase(
+        llvm::makeArrayRef(ClangdServerOpts.QueryDriverGlobs),
+        std::move(BaseCDB));
+  }
   CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags,
               ClangdServerOpts.ResourceDir);
   Server.emplace(*CDB, FSProvider, static_cast<DiagnosticsConsumer &>(*this),
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -62,6 +62,7 @@
   RIFF.cpp
   Selection.cpp
   SourceCode.cpp
+  QueryDriverDatabase.cpp
   Threading.cpp
   Trace.cpp
   TUScheduler.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to