ioeric updated this revision to Diff 130201.
ioeric marked 17 inline comments as done.
ioeric added a comment.
Herald added a reviewer: jkorous-apple.

- Addressed review comments.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D41946

Files:
  clangd/CMakeLists.txt
  clangd/URI.cpp
  clangd/URI.h
  unittests/clangd/CMakeLists.txt
  unittests/clangd/URITests.cpp

Index: unittests/clangd/URITests.cpp
===================================================================
--- /dev/null
+++ unittests/clangd/URITests.cpp
@@ -0,0 +1,181 @@
+//===-- URITests.cpp  ---------------------------------*- C++ -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "URI.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+using ::testing::AllOf;
+
+MATCHER_P(Scheme, S, "") { return arg.scheme() == S; }
+MATCHER_P(Authority, A, "") { return arg.authority() == A; }
+MATCHER_P(Body, B, "") { return arg.body() == B; }
+
+std::string decodeOrDie(llvm::StringRef S) {
+  auto D = percentDecode(S);
+  if (!D)
+    llvm_unreachable(llvm::toString(D.takeError()).c_str());
+  return *D;
+}
+
+TEST(PercentEncodingTest, Encode) {
+  EXPECT_EQ(percentEncode("a/b/c"), "a/b/c");
+  EXPECT_EQ(percentEncode("a!b;c~"), "a%21b%3bc~");
+}
+
+TEST(PercentEncodingTest, Decode) {
+  EXPECT_EQ(decodeOrDie("a/b/c"), "a/b/c");
+  EXPECT_EQ(decodeOrDie("a%21b%3ac~"), "a!b:c~");
+}
+
+// Assume all files in the schema have a "test-root/" root directory, and the
+// schema path is the relative path to the root directory.
+// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z".
+class TestScheme : public URIScheme {
+public:
+  static const char *Scheme;
+
+  static const char *TestRoot;
+
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef Body,
+                  llvm::StringRef CurrentFile) const override {
+    auto Pos = CurrentFile.find(TestRoot);
+    assert(Pos != llvm::StringRef::npos);
+    return (CurrentFile.substr(0, Pos + llvm::StringRef(TestRoot).size()) +
+            Body)
+        .str();
+  }
+
+  llvm::Expected<std::string>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    auto Pos = AbsolutePath.find(TestRoot);
+    assert(Pos != llvm::StringRef::npos);
+    return (llvm::Twine(Scheme) + ":" +
+            AbsolutePath.substr(Pos + Pos + llvm::StringRef(TestRoot).size()))
+        .str();
+  }
+};
+
+const char *TestScheme::Scheme = "test";
+const char *TestScheme::TestRoot = "/test-root/";
+
+static URISchemeRegistry::Add<TestScheme> X(TestScheme::Scheme, "Test schema");
+
+std::string createOrDie(llvm::StringRef AbsolutePath,
+                        llvm::StringRef Scheme = "file") {
+  auto Uri = FileURI::create(AbsolutePath, Scheme);
+  if (!Uri)
+    llvm_unreachable(llvm::toString(Uri.takeError()).c_str());
+  return *Uri;
+}
+
+FileURI parseOrDie(llvm::StringRef Uri) {
+  auto U = FileURI::parse(Uri);
+  if (!U)
+    llvm_unreachable(llvm::toString(U.takeError()).c_str());
+  llvm::errs() << "URI: " << U->scheme() << ", " << U->authority() << ", "
+               << U->body() << "\n";
+  return *U;
+}
+
+std::string resolveOrDie(const FileURI &U, llvm::StringRef CurrentFile = "") {
+  auto Path = FileURI::resolve(U, CurrentFile);
+  if (!Path)
+    llvm_unreachable(llvm::toString(Path.takeError()).c_str());
+  return *Path;
+}
+
+TEST(URITest, Create) {
+  EXPECT_THAT(createOrDie("/x/y/z"), "file:/x/y/z");
+  EXPECT_THAT(createOrDie("/(x)/y/\\ z"), "file:/%28x%29/y/%5c%20z");
+}
+
+TEST(URITest, FailedCreate) {
+  auto Uri = FileURI::create("/x/y/z", "no");
+  EXPECT_FALSE(static_cast<bool>(Uri));
+  llvm::consumeError(Uri.takeError());
+
+  // Path has to be absolute.
+  Uri = FileURI::create("x/y/z");
+  EXPECT_FALSE(static_cast<bool>(Uri));
+  llvm::consumeError(Uri.takeError());
+}
+
+TEST(URITest, Parse) {
+  EXPECT_THAT(parseOrDie("file://auth//x/y/z"),
+              AllOf(Scheme("file"), Authority("auth"), Body("/x/y/z")));
+
+  EXPECT_THAT(parseOrDie("file://au%3dth//%28x%29/y/%5c%20z"),
+              AllOf(Scheme("file"), Authority("au=th"), Body("/(x)/y/\\ z")));
+
+  EXPECT_THAT(parseOrDie("file:/%28x%29/y/%5c%20z"),
+              AllOf(Scheme("file"), Authority(""), Body("/(x)/y/\\ z")));
+}
+
+TEST(URITest, ParseFailed) {
+  auto FailedParse = [](llvm::StringRef U) {
+    auto URI = FileURI::parse("file//x/y/z");
+    if (!URI) {
+      llvm::consumeError(URI.takeError());
+      return true;
+    }
+    return false;
+  };
+
+  // Expect ':' in URI.
+  EXPECT_TRUE(FailedParse("file//x/y/z"));
+  // Expect two bytes after %.
+  EXPECT_TRUE(FailedParse("file://x/y/z%2"));
+  // Empty.
+  EXPECT_TRUE(FailedParse(""));
+  EXPECT_TRUE(FailedParse(":/a/b/c"));
+  EXPECT_TRUE(FailedParse("s:"));
+  // Incomplete.
+  EXPECT_TRUE(FailedParse("x:"));
+  EXPECT_TRUE(FailedParse("x://a"));
+  // Empty authority.
+  EXPECT_TRUE(FailedParse("file:////x/y/z"));
+}
+
+TEST(URITest, Resolve) {
+  EXPECT_EQ(resolveOrDie(parseOrDie("file:/a/b/c")), "/a/b/c");
+  EXPECT_EQ(resolveOrDie(parseOrDie("file://auth//a/b/c")), "/a/b/c");
+  EXPECT_EQ(resolveOrDie(parseOrDie("test:a/b/c"), "/dir/test-root/x/y/z"),
+            "/dir/test-root/a/b/c");
+
+  EXPECT_THAT(resolveOrDie(parseOrDie("file://au%3dth//%28x%29/y/%5c%20z")),
+              "/(x)/y/\\ z");
+}
+
+TEST(URITest, ResolveFailed) {
+  auto FailedResolve = [](llvm::StringRef Uri) {
+    auto Path = FileURI::resolve(parseOrDie(Uri));
+    if (!Path) {
+      llvm::consumeError(Path.takeError());
+      return true;
+    }
+    return false;
+  };
+
+  // Invalid scheme.
+  EXPECT_TRUE(FailedResolve("no:/a/b/c"));
+  // File path needs to be absolute.
+  EXPECT_TRUE(FailedResolve("file:a/b/c"));
+  // File path needs to be absolute.
+  EXPECT_TRUE(FailedResolve("file://c/x/y/z"));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: unittests/clangd/CMakeLists.txt
===================================================================
--- unittests/clangd/CMakeLists.txt
+++ unittests/clangd/CMakeLists.txt
@@ -18,6 +18,7 @@
   FuzzyMatchTests.cpp
   IndexTests.cpp
   JSONExprTests.cpp
+  URITests.cpp
   TestFS.cpp
   TraceTests.cpp
   SourceCodeTests.cpp
Index: clangd/URI.h
===================================================================
--- /dev/null
+++ clangd/URI.h
@@ -0,0 +1,102 @@
+//===--- URI.h - File URIs with schemes --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Registry.h"
+
+namespace clang {
+namespace clangd {
+
+/// A URI describes the location of a source file.
+/// In the simplest case, this is a "file" URI that directly encodes the
+/// absolute path to a file. More abstract cases are possible: a shared index
+/// service might expose repo:// URIs that are relative to the source control
+/// root.
+class FileURI {
+public:
+  /// \brief Returns decoded scheme.
+  llvm::StringRef scheme() const { return Scheme; }
+  /// \brief Returns decoded authority.
+  llvm::StringRef authority() const { return Authority; }
+  /// \brief Returns decoded body.
+  llvm::StringRef body() const { return Body; }
+
+  /// \brief Creates a URI string for a file in the given scheme. \p Scheme must
+  /// must be registered.
+  static llvm::Expected<std::string> create(llvm::StringRef AbsolutePath,
+                                            llvm::StringRef Scheme = "file");
+
+  /// \brief Parse a URI string "<scheme>://<authority>/<path>" where authority
+  /// is optional when "//" is not present.
+  static llvm::Expected<FileURI> parse(llvm::StringRef Uri);
+
+  /// \brief Resolves the absolute path of \p U with the first matching scheme
+  /// registered.
+  static llvm::Expected<std::string> resolve(const FileURI &U,
+                                             llvm::StringRef CurrentFile = "");
+
+  friend bool operator==(const FileURI &LHS, const FileURI &RHS) {
+    return std::tie(LHS.Scheme, LHS.Authority, LHS.Body) ==
+           std::tie(RHS.Scheme, RHS.Authority, RHS.Body);
+  }
+
+private:
+  FileURI() = default;
+
+  std::string Scheme;
+  std::string Authority;
+  std::string Body;
+};
+
+/// \brief URIScheme is an extension point for teaching clangd to recognize a
+/// custom URI scheme. This is expected to be implemented and exposed via the
+/// URISchemeRegistry. Users are not expected to use URIScheme directly.
+///
+/// Different codebases/projects can have different file schemes, and clangd
+/// interprets a file path according to the scheme. For example, a file path
+/// provided by a remote symbol index can follow a certain scheme (e.g. relative
+/// to a project root directory), and clangd needs to combine the scheme path
+/// with execution environment (e.g. working/build directory) in order to get a
+/// file path in the file system.
+class URIScheme {
+public:
+  virtual ~URIScheme() = default;
+
+  /// \brief Returns the absolute path of the file corresponding to the URI body
+  /// in the file system. \p CurrentFile is the file from which the request is
+  /// issued. This is needed because the same URI in different workspace may
+  /// correspond to different files.
+  virtual llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef Body, llvm::StringRef CurrentFile) const = 0;
+
+  virtual llvm::Expected<std::string>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const = 0;
+};
+
+/// \brief Encodes a string according to percent-encoding.
+/// - Unrerved characters are not escaped.
+/// - Reserved characters always escaped with exceptions like '/'.
+/// - All other characters are escaped.
+std::string percentEncode(llvm::StringRef Content);
+
+/// \brief Decodes a string according to percent-encoding.
+llvm::Expected<std::string> percentDecode(llvm::StringRef Content);
+
+/// By default, a "file" scheme is supported where URI paths are always absolute
+/// in the file system.
+typedef llvm::Registry<URIScheme> URISchemeRegistry;
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H
Index: clangd/URI.cpp
===================================================================
--- /dev/null
+++ clangd/URI.cpp
@@ -0,0 +1,191 @@
+//===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "URI.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
+#include <iomanip>
+#include <sstream>
+
+LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
+
+namespace clang {
+namespace clangd {
+namespace {
+
+inline llvm::Error make_string_error(const llvm::Twine &Message) {
+  return llvm::make_error<llvm::StringError>(Message,
+                                             llvm::inconvertibleErrorCode());
+}
+
+/// \brief This manages file paths in the file system. All paths in the scheme
+/// are absolute (with leading '/').
+class FileSystemScheme : public URIScheme {
+public:
+  static const char *Scheme;
+
+  llvm::Expected<std::string>
+  getAbsolutePath(llvm::StringRef Body,
+                  llvm::StringRef /*CurrentFile*/) const override {
+    if (!Body.startswith("/"))
+      return make_string_error(
+          "File scheme: expect body to be an absolute path starting with '/'");
+    // For Windows paths e.g. /X:
+    if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':')
+      Body.consume_front("/");
+    return llvm::sys::path::convert_to_slash(Body);
+  }
+
+  llvm::Expected<std::string>
+  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
+    using namespace llvm::sys;
+
+    if (!AbsolutePath.startswith("/"))
+      return make_string_error(
+          "File scheme: An AbsolutePath must start with '/'.");
+    std::string Body;
+    // For Windows paths e.g. X:
+    if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':')
+      Body = "/";
+    Body += path::convert_to_slash(AbsolutePath, path::Style::posix);
+    return (llvm::Twine(Scheme) + ":" + percentEncode(Body)).str();
+  }
+};
+
+const char *FileSystemScheme::Scheme = "file";
+
+static URISchemeRegistry::Add<FileSystemScheme>
+    X(FileSystemScheme::Scheme,
+      "URI scheme for absolute paths in the file system.");
+
+llvm::Expected<std::unique_ptr<URIScheme>>
+findSchemeByName(llvm::StringRef Scheme) {
+  for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end();
+       I != E; ++I) {
+    if (I->getName() != Scheme)
+      continue;
+    return I->instantiate();
+  }
+  return make_string_error("Can't find scheme: " + Scheme);
+}
+
+constexpr static char Unreserved[] = {
+    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b',
+    'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+    'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4',
+    '5', '6', '7', '8', '9', '0', '-', '_', '.', '~',
+};
+
+inline std::string percentize(char C) {
+  std::ostringstream OS;
+  OS << "%" << std::setfill('0') << std::setw(2) << std::hex
+     << static_cast<int>(C);
+  return OS.str();
+}
+
+std::vector<std::string> createEncodeMap() {
+  std::vector<std::string> Result(128, "");
+  for (char C : Unreserved)
+    Result[C] = C;
+  for (int i = 0; i < 128; i++)
+    if (Result[i].empty())
+      Result[i] = percentize(static_cast<char>(i));
+  // Avoid escaping '/' to save URI readability.
+  Result['/'] = '/';
+  return Result;
+}
+
+} // namespace
+
+std::string percentEncode(llvm::StringRef Content) {
+  static const std::vector<std::string> EncodeMap = createEncodeMap();
+  std::string Result;
+  for (char C : Content)
+    Result += EncodeMap[C];
+  return Result;
+}
+
+llvm::Expected<std::string> percentDecode(llvm::StringRef Content) {
+  std::string Result;
+  for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
+    if (*I != '%') {
+      Result += *I;
+      continue;
+    }
+    if (I + 1 == E || I + 2 == E)
+      return make_string_error("Expect two characters after '%' sign: Content");
+    char Buf[3];
+    Buf[0] = *(++I);
+    Buf[1] = *(++I);
+    Buf[2] = '\0';
+    Result += static_cast<char>(strtoul(Buf, nullptr, 16));
+  }
+  return Result;
+}
+
+llvm::Expected<FileURI> FileURI::parse(llvm::StringRef Uri) {
+  FileURI U;
+  llvm::StringRef OrigUri = Uri;
+
+  auto Pos = Uri.find(':');
+  if (Pos == llvm::StringRef::npos)
+    return make_string_error("Expect ':' in a URI: " + OrigUri);
+  auto Decoded = percentDecode(Uri.substr(0, Pos));
+  if (!Decoded)
+    return Decoded.takeError();
+  U.Scheme = *Decoded;
+
+  Uri = Uri.substr(Pos + 1);
+  if (Uri.consume_front("//")) {
+    Pos = Uri.find('/');
+    if (Pos == llvm::StringRef::npos)
+      return make_string_error("Expect '/' after a URI authority: " + OrigUri);
+
+    Decoded = percentDecode(Uri.substr(0, Pos));
+    if (!Decoded)
+      return Decoded.takeError();
+    if (Decoded->empty())
+      return make_string_error(
+          "'//' is present after scheme while authority is not provided: " +
+          OrigUri);
+    U.Authority = *Decoded;
+    Uri = Uri.substr(Pos + 1);
+  }
+  Decoded = percentDecode(Uri);
+  if (!Decoded)
+    return Decoded.takeError();
+  U.Body = *Decoded;
+
+  if (U.Scheme.empty() || U.Body.empty())
+    return make_string_error("Scheme and body must be provided in URI: " +
+                             OrigUri);
+
+  return U;
+}
+
+llvm::Expected<std::string> FileURI::create(llvm::StringRef AbsolutePath,
+                                            llvm::StringRef Scheme) {
+  auto S = findSchemeByName(Scheme);
+  if (!S)
+    return S.takeError();
+  return S->get()->uriFromAbsolutePath(AbsolutePath);
+}
+
+llvm::Expected<std::string> FileURI::resolve(const FileURI &Uri,
+                                             llvm::StringRef CurrentFile) {
+  auto S = findSchemeByName(Uri.Scheme);
+  if (!S)
+    return S.takeError();
+  return S->get()->getAbsolutePath(Uri.Body, CurrentFile);
+}
+
+} // namespace clangd
+} // namespace clang
Index: clangd/CMakeLists.txt
===================================================================
--- clangd/CMakeLists.txt
+++ clangd/CMakeLists.txt
@@ -21,6 +21,7 @@
   ProtocolHandlers.cpp
   SourceCode.cpp
   Trace.cpp
+  URI.cpp
   XRefs.cpp
   index/FileIndex.cpp
   index/Index.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to