sammccall created this revision.
Herald added subscribers: usaxena95, kadircet, jfb, arphaman.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, ilya-biryukov.
Herald added a project: clang.
This is an idea to attack two problems:
- simplify string tracking, copies, comparisons. Should be worth some perf,
maybe reduce complexity in some places.
- make it easy+efficient to treat paths as case-insensitive where needed (we've
been stomping these bugs as they get reported, but there are more...)
Three main limitations I'm running into:
- It's really painful to try to convert all of clangd to use this at once, but
it's hard to measure the performance by converting only part.
- Conversion back and forth to URIs limits our wins, we're copying+re-interning
all the time. For the index, fix is to eliminate URIs, we have ideas for this.
URIs on the wire are fine, we can just slap a cache on them.
- Pulling filenames out of clang APIs (FileManager) limits our wins too.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D95824
Files:
clang-tools-extra/clangd/support/Path.cpp
clang-tools-extra/clangd/support/Path.h
Index: clang-tools-extra/clangd/support/Path.h
===================================================================
--- clang-tools-extra/clangd/support/Path.h
+++ clang-tools-extra/clangd/support/Path.h
@@ -9,7 +9,9 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SUPPORT_PATH_H
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include <string>
namespace clang {
@@ -17,18 +19,67 @@
/// A typedef to represent a file path. Used solely for more descriptive
/// signatures.
-using Path = std::string;
+// using Path = std::string;
/// A typedef to represent a ref to file path. Used solely for more descriptive
/// signatures.
-using PathRef = llvm::StringRef;
+// using PathRef = llvm::StringRef;
// For platforms where paths are case-insensitive (but case-preserving),
// we need to do case-insensitive comparisons and use lowercase keys.
// FIXME: Make Path a real class with desired semantics instead.
-std::string maybeCaseFoldPath(PathRef Path);
-bool pathEqual(PathRef, PathRef);
+std::string maybeCaseFoldPath(llvm::StringRef Path);
+bool pathEqual(llvm::StringRef, llvm::StringRef);
+
+class IPath {
+ const char *Data;
+ unsigned Length;
+ unsigned Hash;
+
+ IPath(unsigned Sentinel) : Data(nullptr), Length(0), Hash(Sentinel) {}
+ struct CanonicalTag {};
+ IPath(llvm::StringRef Value, CanonicalTag);
+
+public:
+ IPath() : Data(0), Length(0), Hash(0) {}
+ explicit IPath(llvm::StringRef Value);
+ IPath &operator=(const IPath &) = default;
+ IPath &operator=(llvm::StringRef V) { return *this = IPath(V); }
+
+ bool empty() const { return !Length; }
+
+ operator llvm::StringRef() const { return llvm::StringRef(Data, Length); }
+ operator llvm::Twine() const { return llvm::Twine(llvm::StringRef(*this)); }
+ friend bool operator==(IPath L, IPath R) {
+ if (L.Data == R.Data)
+ return true;
+ if (L.Hash != R.Hash)
+ return false;
+ return pathEqual(L, R);
+ }
+ friend llvm::hash_code hash_value(IPath P) {
+ // Just mix the stored 32-bit hash, should be enough?!
+ return llvm::hash_value(P.Hash);
+ }
+
+ friend struct llvm::DenseMapInfo<IPath>;
+ friend class PathTable;
+};
+inline bool operator!=(IPath L, IPath R) { return !(L == R); }
+
+using Path = IPath;
+using PathRef = IPath;
} // namespace clangd
} // namespace clang
+namespace llvm {
+template <> struct llvm::DenseMapInfo<clang::clangd::IPath> {
+ using IPath = clang::clangd::IPath;
+ static inline IPath getEmptyKey() { return IPath(); }
+ static inline IPath getTombstoneKey() { return IPath(1); }
+ static unsigned getHashValue(IPath Val) { return Val.Hash; }
+ static bool isEqual(IPath L, clang::clangd::IPath R) { return L == R; }
+};
+} // namespace llvm
+
#endif
Index: clang-tools-extra/clangd/support/Path.cpp
===================================================================
--- clang-tools-extra/clangd/support/Path.cpp
+++ clang-tools-extra/clangd/support/Path.cpp
@@ -7,10 +7,16 @@
//===----------------------------------------------------------------------===//
#include "support/Path.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include <atomic>
+#include <linux/limits.h>
+
namespace clang {
namespace clangd {
-std::string maybeCaseFoldPath(PathRef Path) {
+std::string maybeCaseFoldPath(llvm::StringRef Path) {
#if defined(_WIN32) || defined(__APPLE__)
return Path.lower();
#else
@@ -18,7 +24,7 @@
#endif
}
-bool pathEqual(PathRef A, PathRef B) {
+bool pathEqual(llvm::StringRef A, llvm::StringRef B) {
#if defined(_WIN32) || defined(__APPLE__)
return A.equals_lower(B);
#else
@@ -26,5 +32,88 @@
#endif
}
+static unsigned pathHash(llvm::StringRef Path) {
+ llvm::SmallString<256> Upper = Path;
+ for (char &C : Upper)
+ C &= 0xdf;
+ return llvm::hash_value(Path);
+}
+
+class PathTable {
+ struct Entry {
+ IPath P;
+ std::atomic<Entry *> Next;
+ llvm::hash_code Hash;
+ };
+ const unsigned NBuckets;
+ std::atomic<Entry *> *Table;
+ std::atomic<size_t> MemoryUsage;
+
+ constexpr static std::memory_order Acquire = std::memory_order_acquire;
+ constexpr static std::memory_order Release = std::memory_order_release;
+
+public:
+ PathTable(unsigned NBuckets)
+ : NBuckets(NBuckets), Table(new std::atomic<Entry *>[NBuckets]) {}
+
+ ~PathTable() {
+ assert(MemoryUsage == 0 && "Cannot destroy nonempty PathTable");
+ delete[](Table);
+ }
+
+ const IPath &put(llvm::StringRef Key) {
+ llvm::hash_code Hash = llvm::hash_value(Key);
+ std::atomic<Entry *> *Tail = &Table[Hash % NBuckets];
+ // Scan through the bucket looking for a matching result.
+ for (;;) {
+ Entry *E = Tail->load(std::memory_order_acquire);
+ if (!E)
+ break;
+ if (E->Hash == Hash) {
+ // Don't bother comparing strings, we won't see >1M.
+ assert(Key == llvm::StringRef(E->P) && "Hash collision?");
+ return E->P;
+ }
+ Tail = &E->Next;
+ }
+ // Not found.
+ // Tail points to the null "next" pointer at the end of the chain.
+ // Allocate the data, so we can attach it to the chain.
+ unsigned BufSize = sizeof(Entry) + Key.size();
+ void *Buf = malloc(BufSize);
+ char *Data = reinterpret_cast<char *>(Buf) + sizeof(Entry);
+ memcpy(Data, Key.data(), Key.size());
+ Entry *NewEntry = new (Buf) Entry{
+ IPath(llvm::StringRef(Data, Key.size()), IPath::CanonicalTag{}),
+ {nullptr},
+ Hash,
+ };
+ // Now attempt to append it to the chain.
+ for (;;) {
+ Entry *NewTail = nullptr;
+ if (Tail->compare_exchange_weak(NewTail, NewEntry,
+ std::memory_order_acq_rel)) {
+ MemoryUsage += BufSize;
+ return NewEntry->P; // insert succeeded
+ }
+ // We lost a race against another insert...
+ if (NewTail->Hash == Hash) { // ...of the same value?
+ NewEntry->~Entry();
+ free(Buf);
+ return NewTail->P;
+ }
+ // ... or of a different value, advance and retry.
+ Tail = &NewTail->Next;
+ }
+ }
+};
+PathTable Table(1 << 20);
+
+IPath::IPath(llvm::StringRef Value) {
+ *this = Value.size() ? IPath() : Table.put(Value);
+}
+IPath::IPath(llvm::StringRef Value, CanonicalTag)
+ : Data(Value.data()), Length(Value.size()), Hash(pathHash(Value)) {}
+
} // namespace clangd
} // namespace clang
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits