sammccall created this revision.
Herald added subscribers: usaxena95, kadircet, jfb, arphaman.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, ilya-biryukov.
Herald added a project: clang.

This is an idea to attack two problems:

- simplify string tracking, copies, comparisons. Should be worth some perf, 
maybe reduce complexity in some places.
- make it easy+efficient to treat paths as case-insensitive where needed (we've 
been stomping these bugs as they get reported, but there are more...)

Three main limitations I'm running into:

- It's really painful to try to convert all of clangd to use this at once, but 
it's hard to measure the performance by converting only part.
- Conversion back and forth to URIs limits our wins, we're copying+re-interning 
all the time. For the index, fix is to eliminate URIs, we have ideas for this. 
URIs on the wire are fine, we can just slap a cache on them.
- Pulling filenames out of clang APIs (FileManager) limits our wins too.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D95824

Files:
  clang-tools-extra/clangd/support/Path.cpp
  clang-tools-extra/clangd/support/Path.h

Index: clang-tools-extra/clangd/support/Path.h
===================================================================
--- clang-tools-extra/clangd/support/Path.h
+++ clang-tools-extra/clangd/support/Path.h
@@ -9,7 +9,9 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H
 
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include <string>
 
 namespace clang {
@@ -17,18 +19,67 @@
 
 /// A typedef to represent a file path. Used solely for more descriptive
 /// signatures.
-using Path = std::string;
+// using Path = std::string;
 /// A typedef to represent a ref to file path. Used solely for more descriptive
 /// signatures.
-using PathRef = llvm::StringRef;
+// using PathRef = llvm::StringRef;
 
 // For platforms where paths are case-insensitive (but case-preserving),
 // we need to do case-insensitive comparisons and use lowercase keys.
 // FIXME: Make Path a real class with desired semantics instead.
-std::string maybeCaseFoldPath(PathRef Path);
-bool pathEqual(PathRef, PathRef);
+std::string maybeCaseFoldPath(llvm::StringRef Path);
+bool pathEqual(llvm::StringRef, llvm::StringRef);
+
+class IPath {
+  const char *Data;
+  unsigned Length;
+  unsigned Hash;
+
+  IPath(unsigned Sentinel) : Data(nullptr), Length(0), Hash(Sentinel) {}
+  struct CanonicalTag {};
+  IPath(llvm::StringRef Value, CanonicalTag);
+
+public:
+  IPath() : Data(0), Length(0), Hash(0) {}
+  explicit IPath(llvm::StringRef Value);
+  IPath &operator=(const IPath &) = default;
+  IPath &operator=(llvm::StringRef V) { return *this = IPath(V); }
+
+  bool empty() const { return !Length; }
+
+  operator llvm::StringRef() const { return llvm::StringRef(Data, Length); }
+  operator llvm::Twine() const { return llvm::Twine(llvm::StringRef(*this)); }
+  friend bool operator==(IPath L, IPath R) {
+    if (L.Data == R.Data)
+      return true;
+    if (L.Hash != R.Hash)
+      return false;
+    return pathEqual(L, R);
+  }
+  friend llvm::hash_code hash_value(IPath P) {
+    // Just mix the stored 32-bit hash, should be enough?!
+    return llvm::hash_value(P.Hash);
+  }
+
+  friend struct llvm::DenseMapInfo<IPath>;
+  friend class PathTable;
+};
+inline bool operator!=(IPath L, IPath R) { return !(L == R); }
+
+using Path = IPath;
+using PathRef = IPath;
 
 } // namespace clangd
 } // namespace clang
 
+namespace llvm {
+template <> struct llvm::DenseMapInfo<clang::clangd::IPath> {
+  using IPath = clang::clangd::IPath;
+  static inline IPath getEmptyKey() { return IPath(); }
+  static inline IPath getTombstoneKey() { return IPath(1); }
+  static unsigned getHashValue(IPath Val) { return Val.Hash; }
+  static bool isEqual(IPath L, clang::clangd::IPath R) { return L == R; }
+};
+} // namespace llvm
+
 #endif
Index: clang-tools-extra/clangd/support/Path.cpp
===================================================================
--- clang-tools-extra/clangd/support/Path.cpp
+++ clang-tools-extra/clangd/support/Path.cpp
@@ -7,10 +7,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "support/Path.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <atomic>
+#include <linux/limits.h>
+
 namespace clang {
 namespace clangd {
 
-std::string maybeCaseFoldPath(PathRef Path) {
+std::string maybeCaseFoldPath(llvm::StringRef Path) {
 #if defined(_WIN32) || defined(__APPLE__)
   return Path.lower();
 #else
@@ -18,7 +24,7 @@
 #endif
 }
 
-bool pathEqual(PathRef A, PathRef B) {
+bool pathEqual(llvm::StringRef A, llvm::StringRef B) {
 #if defined(_WIN32) || defined(__APPLE__)
   return A.equals_lower(B);
 #else
@@ -26,5 +32,88 @@
 #endif
 }
 
+static unsigned pathHash(llvm::StringRef Path) {
+  llvm::SmallString<256> Upper = Path;
+  for (char &C : Upper)
+    C &= 0xdf;
+  return llvm::hash_value(Path);
+}
+
+class PathTable {
+  struct Entry {
+    IPath P;
+    std::atomic<Entry *> Next;
+    llvm::hash_code Hash;
+  };
+  const unsigned NBuckets;
+  std::atomic<Entry *> *Table;
+  std::atomic<size_t> MemoryUsage;
+
+  constexpr static std::memory_order Acquire = std::memory_order_acquire;
+  constexpr static std::memory_order Release = std::memory_order_release;
+
+public:
+  PathTable(unsigned NBuckets)
+      : NBuckets(NBuckets), Table(new std::atomic<Entry *>[NBuckets]) {}
+
+  ~PathTable() {
+    assert(MemoryUsage == 0 && "Cannot destroy nonempty PathTable");
+    delete[](Table);
+  }
+
+  const IPath &put(llvm::StringRef Key) {
+    llvm::hash_code Hash = llvm::hash_value(Key);
+    std::atomic<Entry *> *Tail = &Table[Hash % NBuckets];
+    // Scan through the bucket looking for a matching result.
+    for (;;) {
+      Entry *E = Tail->load(std::memory_order_acquire);
+      if (!E)
+        break;
+      if (E->Hash == Hash) {
+        // Don't bother comparing strings, we won't see >1M.
+        assert(Key == llvm::StringRef(E->P) && "Hash collision?");
+        return E->P;
+      }
+      Tail = &E->Next;
+    }
+    // Not found.
+    // Tail points to the null "next" pointer at the end of the chain.
+    // Allocate the data, so we can attach it to the chain.
+    unsigned BufSize = sizeof(Entry) + Key.size();
+    void *Buf = malloc(BufSize);
+    char *Data = reinterpret_cast<char *>(Buf) + sizeof(Entry);
+    memcpy(Data, Key.data(), Key.size());
+    Entry *NewEntry = new (Buf) Entry{
+        IPath(llvm::StringRef(Data, Key.size()), IPath::CanonicalTag{}),
+        {nullptr},
+        Hash,
+    };
+    // Now attempt to append it to the chain.
+    for (;;) {
+      Entry *NewTail = nullptr;
+      if (Tail->compare_exchange_weak(NewTail, NewEntry,
+                                      std::memory_order_acq_rel)) {
+        MemoryUsage += BufSize;
+        return NewEntry->P; // insert succeeded
+      }
+      // We lost a race against another insert...
+      if (NewTail->Hash == Hash) { // ...of the same value?
+        NewEntry->~Entry();
+        free(Buf);
+        return NewTail->P;
+      }
+      // ... or of a different value, advance and retry.
+      Tail = &NewTail->Next;
+    }
+  }
+};
+PathTable Table(1 << 20);
+
+IPath::IPath(llvm::StringRef Value) {
+  *this = Value.size() ? IPath() : Table.put(Value);
+}
+IPath::IPath(llvm::StringRef Value, CanonicalTag)
+    : Data(Value.data()), Length(Value.size()), Hash(pathHash(Value)) {}
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to