https://github.com/justincady updated https://github.com/llvm/llvm-project/pull/180285
>From 991944ef6aab28aa7de7f2ff3a4c54cca4804d4d Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 14:51:23 -0500 Subject: [PATCH 01/18] Add --background-index-path-mappings option Add the option and corresponding storage. Mimic the existing path mappings (used for remote indexing) as much as possible. --- clang-tools-extra/clangd/ClangdServer.h | 6 ++++++ clang-tools-extra/clangd/tool/ClangdMain.cpp | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index 3ffaf67553dce..e4a52ff682002 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -17,6 +17,7 @@ #include "GlobalCompilationDatabase.h" #include "Hover.h" #include "ModulesBuilder.h" +#include "PathMapping.h" #include "Protocol.h" #include "SemanticHighlighting.h" #include "TUScheduler.h" @@ -199,6 +200,11 @@ class ClangdServer { /// regions in the document. bool PublishInactiveRegions = false; + /// Path mappings applied to background index files on disk. Used to enable + /// sharing of indexes when the client path differs from the path of index + /// generation. + PathMappings BackgroundIndexPathMappings; + explicit operator TUScheduler::Options() const; }; // Sensible default options for use in tests. diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 54af3662470db..f702db03907a1 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -435,6 +435,16 @@ opt<bool> EnableTestScheme{ Hidden, }; +opt<std::string> BackgroundIndexPathMappings{ + "background-index-path-mappings", + cat(Protocol), + desc("Translate clients paths prior to writing background index files to " + "disk. Enables sharing of background index files between clients. " + "Format is identical to --path-mappings. " + "e.g. /local/workspace=/TOKEN/workspace"), + init(""), +}; + opt<std::string> PathMappingsArg{ "path-mappings", cat(Protocol), @@ -937,6 +947,15 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var #endif Opts.BackgroundIndex = EnableBackgroundIndex; Opts.BackgroundIndexPriority = BackgroundIndexPriority; + if (!BackgroundIndexPathMappings.empty()) { + auto Mappings = parsePathMappings(BackgroundIndexPathMappings); + if (!Mappings) { + elog("Invalid --background-index-path-mappings: {0}", + Mappings.takeError()); + return 1; + } + Opts.BackgroundIndexPathMappings = std::move(*Mappings); + } Opts.ReferencesLimit = ReferencesLimit; Opts.Rename.LimitFiles = RenameFileLimit; auto PAI = createProjectAwareIndex( >From 0faac6171f9332a9d246847cd49d2918521b64a9 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 14:55:26 -0500 Subject: [PATCH 02/18] Pass mappings to createDiskBackedStorageFactory --- clang-tools-extra/clangd/ClangdServer.cpp | 3 ++- clang-tools-extra/clangd/index/Background.h | 5 ++++- clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index f1a87dd12d905..b1bcd975a1346 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -262,7 +262,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, BackgroundIdx = std::make_unique<BackgroundIndex>( TFS, CDB, BackgroundIndexStorage::createDiskBackedStorageFactory( - [&CDB](llvm::StringRef File) { return CDB.getProjectInfo(File); }), + [&CDB](llvm::StringRef File) { return CDB.getProjectInfo(File); }, + Opts.BackgroundIndexPathMappings), std::move(BGOpts)); AddIndex(BackgroundIdx.get()); } diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 448e911201575..8bbdf72457c0e 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_BACKGROUND_H #include "GlobalCompilationDatabase.h" +#include "PathMapping.h" #include "SourceCode.h" #include "index/BackgroundRebuild.h" #include "index/FileIndex.h" @@ -61,8 +62,10 @@ class BackgroundIndexStorage { // CDBDirectory + ".cache/clangd/index/" as the folder to save shards. // CDBDirectory is the first directory containing a CDB in parent directories // of a file, or user cache directory if none was found, e.g. stdlib headers. + // If Mappings are given, paths are remapped before shards are saved to disk. static Factory createDiskBackedStorageFactory( - std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo); + std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, + PathMappings Mappings); }; // A priority queue of tasks which can be run on (external) worker threads. diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 470be79590863..048deb4db9332 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "GlobalCompilationDatabase.h" +#include "PathMapping.h" #include "index/Background.h" #include "support/Logger.h" #include "support/Path.h" @@ -150,7 +151,8 @@ class DiskBackedIndexStorageManager { BackgroundIndexStorage::Factory BackgroundIndexStorage::createDiskBackedStorageFactory( - std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo) { + std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, + PathMappings Mappings) { return DiskBackedIndexStorageManager(std::move(GetProjectInfo)); } >From 1b91038430041871a72e5f209057919d3b663765 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 14:59:47 -0500 Subject: [PATCH 03/18] Pass background mappings to DiskBackedIndexStorage --- .../clangd/index/BackgroundIndexStorage.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 048deb4db9332..3e133fadf9844 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -37,10 +37,12 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, // Uses disk as a storage for index shards. class DiskBackedIndexStorage : public BackgroundIndexStorage { std::string DiskShardRoot; + PathMappings Mappings; public: // Creates `DiskShardRoot` and any parents during construction. - DiskBackedIndexStorage(llvm::StringRef Directory) : DiskShardRoot(Directory) { + DiskBackedIndexStorage(llvm::StringRef Directory, PathMappings Mappings) + : DiskShardRoot(Directory), Mappings(std::move(Mappings)) { std::error_code OK; std::error_code EC = llvm::sys::fs::create_directories(DiskShardRoot); if (EC != OK) { @@ -107,9 +109,11 @@ class NullStorage : public BackgroundIndexStorage { class DiskBackedIndexStorageManager { public: DiskBackedIndexStorageManager( - std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo) + std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, + PathMappings Mappings) : IndexStorageMapMu(std::make_unique<std::mutex>()), - GetProjectInfo(std::move(GetProjectInfo)) { + GetProjectInfo(std::move(GetProjectInfo)), + Mappings(std::move(Mappings)) { llvm::SmallString<128> FallbackDir; if (llvm::sys::path::cache_directory(FallbackDir)) llvm::sys::path::append(FallbackDir, "clangd", "index"); @@ -136,7 +140,7 @@ class DiskBackedIndexStorageManager { elog("Tried to create storage for empty directory!"); return std::make_unique<NullStorage>(); } - return std::make_unique<DiskBackedIndexStorage>(CDBDirectory); + return std::make_unique<DiskBackedIndexStorage>(CDBDirectory, Mappings); } Path FallbackDir; @@ -145,6 +149,7 @@ class DiskBackedIndexStorageManager { std::unique_ptr<std::mutex> IndexStorageMapMu; std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo; + PathMappings Mappings; }; } // namespace @@ -153,7 +158,8 @@ BackgroundIndexStorage::Factory BackgroundIndexStorage::createDiskBackedStorageFactory( std::function<std::optional<ProjectInfo>(PathRef)> GetProjectInfo, PathMappings Mappings) { - return DiskBackedIndexStorageManager(std::move(GetProjectInfo)); + return DiskBackedIndexStorageManager(std::move(GetProjectInfo), + std::move(Mappings)); } } // namespace clangd >From 02641754272b1086bdf4a2a8eab98933901bf640 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:03:48 -0500 Subject: [PATCH 04/18] Add mapping lambdas to DiskBackedIndexStorage The lambda functions will be used in a later commit to transform paths prior to serializing (and deserializing) the background index files. The goal is to reuse existing PathMapping infrastructure as much as possible for background indexing (e.g. PathMapping::Direction being reused). --- .../clangd/index/BackgroundIndexStorage.cpp | 22 +++++++++++++++++++ .../clangd/index/Serialization.h | 4 ++++ 2 files changed, 26 insertions(+) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 3e133fadf9844..1b5e6b3d3f732 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -25,6 +25,15 @@ namespace clang { namespace clangd { namespace { +// Apply path mapping to file URI. Return original URI if no mapping applies. +std::string applyPathMappingToURI(llvm::StringRef URI, + PathMapping::Direction Direction, + const PathMappings &Mappings) { + if (auto Mapped = doPathMapping(URI, Direction, Mappings)) + return std::move(*Mapped); + return URI.str(); +} + std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, llvm::StringRef FilePath) { llvm::SmallString<128> ShardRootSS(ShardRoot); @@ -38,11 +47,24 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, class DiskBackedIndexStorage : public BackgroundIndexStorage { std::string DiskShardRoot; PathMappings Mappings; + URITransform LoadTransform; + URITransform StoreTransform; public: // Creates `DiskShardRoot` and any parents during construction. DiskBackedIndexStorage(llvm::StringRef Directory, PathMappings Mappings) : DiskShardRoot(Directory), Mappings(std::move(Mappings)) { + // Background path mappings are specified as /local/path=/canonical/path. + // During load we transform from canonical to local (ServerToClient). + LoadTransform = [this](llvm::StringRef URI) { + return applyPathMappingToURI(URI, PathMapping::Direction::ServerToClient, + this->Mappings); + }; + // During store we transform from local to canonical (ClientToServer). + StoreTransform = [this](llvm::StringRef URI) { + return applyPathMappingToURI(URI, PathMapping::Direction::ClientToServer, + this->Mappings); + }; std::error_code OK; std::error_code EC = llvm::sys::fs::create_directories(DiskShardRoot); if (EC != OK) { diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index bf8e036afcb6c..0942155d8e898 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -28,12 +28,16 @@ #include "index/Index.h" #include "index/Symbol.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/Support/Error.h" #include <optional> namespace clang { namespace clangd { +// Used to remap URIs during serialization/deserialization +using URITransform = llvm::unique_function<std::string(llvm::StringRef) const>; + enum class IndexFileFormat { RIFF, // Versioned binary format, suitable for production use. YAML, // Human-readable format, suitable for experiments and debugging. >From 211fea6e33a59e9fa05169d327524b46dd86ac83 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:32:43 -0500 Subject: [PATCH 05/18] Use the mapped URI to compute shard filenames Because the background index files in this mode are intended to be shared between clients with their own unique absolute paths, the shard filenames must be hashed on the _mapped_ URI. This naming only differs when `--background-index-path-mappings` is active. Otherwise, existing clangd background indexes would be invalidated (which we do not want!). A new unit test ensures that hashing is unchanged for the default, non-mapping case. --- .../clangd/index/BackgroundIndexStorage.cpp | 25 +++++++++---- .../clangd/unittests/BackgroundIndexTests.cpp | 36 +++++++++++++++++++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 1b5e6b3d3f732..eb7ab6f37345f 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -8,6 +8,7 @@ #include "GlobalCompilationDatabase.h" #include "PathMapping.h" +#include "URI.h" #include "index/Background.h" #include "support/Logger.h" #include "support/Path.h" @@ -35,11 +36,22 @@ std::string applyPathMappingToURI(llvm::StringRef URI, } std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, - llvm::StringRef FilePath) { + llvm::StringRef FilePath, + const PathMappings &Mappings) { + std::string HashInput; + if (Mappings.empty()) { + HashInput = FilePath.str(); + } else { + // Hash the mapped URI so that shards are consistently named regardless of + // the path of the generating client + std::string FileURI = URI::createFile(FilePath).toString(); + HashInput = applyPathMappingToURI( + FileURI, PathMapping::Direction::ClientToServer, Mappings); + } llvm::SmallString<128> ShardRootSS(ShardRoot); - llvm::sys::path::append(ShardRootSS, llvm::sys::path::filename(FilePath) + - "." + llvm::toHex(digest(FilePath)) + - ".idx"); + llvm::sys::path::append(ShardRootSS, + llvm::sys::path::filename(FilePath) + "." + + llvm::toHex(digest(HashInput)) + ".idx"); return std::string(ShardRootSS); } @@ -85,7 +97,7 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { std::unique_ptr<IndexFileIn> loadShard(llvm::StringRef ShardIdentifier) const override { const std::string ShardPath = - getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + getShardPathFromFilePath(DiskShardRoot, ShardIdentifier, Mappings); auto Buffer = llvm::MemoryBuffer::getFile(ShardPath); if (!Buffer) return nullptr; @@ -100,7 +112,8 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { llvm::Error storeShard(llvm::StringRef ShardIdentifier, IndexFileOut Shard) const override { - auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + auto ShardPath = + getShardPathFromFilePath(DiskShardRoot, ShardIdentifier, Mappings); return llvm::writeToOutput(ShardPath, [&Shard](llvm::raw_ostream &OS) { OS << Shard; return llvm::Error::success(); diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 0eb4acf0469b7..7f99db011acda 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -2,6 +2,8 @@ #include "CompileCommands.h" #include "Config.h" #include "Headers.h" +#include "PathMapping.h" +#include "SourceCode.h" #include "SyncAPI.h" #include "TestFS.h" #include "TestTU.h" @@ -11,6 +13,8 @@ #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -1031,5 +1035,37 @@ TEST(BackgroundIndex, Profile) { UnorderedElementsAre(Pair("slabs", _), Pair("index", _))); } +// Verify shard filenames are unchanged when no path mappings are used +TEST(BackgroundIndexStorage, ShardFilenameUnchangedWithoutPathMappings) { + llvm::SmallString<256> TempDir; + ASSERT_FALSE(llvm::sys::fs::createUniqueDirectory("clangd-test", TempDir)); + llvm::scope_exit Cleanup([&] { llvm::sys::fs::remove_directories(TempDir); }); + + auto Factory = BackgroundIndexStorage::createDiskBackedStorageFactory( + [&](PathRef) -> std::optional<ProjectInfo> { + return ProjectInfo{TempDir.str().str()}; + }, + /*Mappings=*/{}); + + std::string TestFilePath = (TempDir + "/foo.cpp").str(); + BackgroundIndexStorage *Storage = Factory(TestFilePath); + ASSERT_NE(Storage, nullptr); + + // Store a minimal shard to create the file + SymbolSlab::Builder SB; + SymbolSlab Symbols = std::move(SB).build(); + IndexFileOut Shard; + Shard.Symbols = &Symbols; + ASSERT_FALSE(Storage->storeShard(TestFilePath, std::move(Shard))); + + // Shard filename hash must be based on TestFilePath, not a file:// URI + llvm::SmallString<256> ExpectedPath(TempDir); + llvm::sys::path::append(ExpectedPath, ".cache", "clangd", "index", + "foo.cpp." + llvm::toHex(digest(TestFilePath)) + + ".idx"); + EXPECT_TRUE(llvm::sys::fs::exists(ExpectedPath)) + << "Expected shard file not found: " << ExpectedPath; +} + } // namespace clangd } // namespace clang >From a6f3084bbf44ee9ac52e779b4f5c6b9e35558ee0 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:39:18 -0500 Subject: [PATCH 06/18] Apply background index path mapping on load When reading shards from disk, apply any background index path mappings prior to storing URIs in the string table. --- .../clangd/index/BackgroundIndexStorage.cpp | 5 +++-- .../clangd/index/Serialization.cpp | 21 ++++++++++++------- .../clangd/index/Serialization.h | 5 ++++- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index eb7ab6f37345f..4f7f8e09f4fac 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -101,8 +101,9 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { auto Buffer = llvm::MemoryBuffer::getFile(ShardPath); if (!Buffer) return nullptr; - if (auto I = - readIndexFile(Buffer->get()->getBuffer(), SymbolOrigin::Background)) + const URITransform *Transform = Mappings.empty() ? nullptr : &LoadTransform; + if (auto I = readIndexFile(Buffer->get()->getBuffer(), + SymbolOrigin::Background, Transform)) return std::make_unique<IndexFileIn>(std::move(*I)); else elog("Error while reading shard {0}: {1}", ShardIdentifier, diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index f03839599612c..f2e4e9ec3f14f 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -214,7 +214,8 @@ struct StringTableIn { std::vector<llvm::StringRef> Strings; }; -llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) { +llvm::Expected<StringTableIn> +readStringTable(llvm::StringRef Data, const URITransform *Transform = nullptr) { Reader R(Data); size_t UncompressedSize = R.consume32(); if (R.err()) @@ -249,7 +250,12 @@ llvm::Expected<StringTableIn> readStringTable(llvm::StringRef Data) { auto Len = R.rest().find(0); if (Len == llvm::StringRef::npos) return error("Bad string table: not null terminated"); - Table.Strings.push_back(Saver.save(R.consume(Len))); + llvm::StringRef S = R.consume(Len); + // Apply any provided path mapping transform to incoming file:// URIs + if (Transform && S.starts_with("file://")) + Table.Strings.push_back(Saver.save((*Transform)(S))); + else + Table.Strings.push_back(Saver.save(S)); R.consume8(); } if (R.err()) @@ -459,8 +465,8 @@ readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) { // data. Later we may want to support some backward compatibility. constexpr static uint32_t Version = 20; -llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, - SymbolOrigin Origin) { +llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, SymbolOrigin Origin, + const URITransform *Transform) { auto RIFF = riff::readFile(Data); if (!RIFF) return RIFF.takeError(); @@ -483,7 +489,7 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, if (!Chunks.count(RequiredChunk)) return error("missing required chunk {0}", RequiredChunk); - auto Strings = readStringTable(Chunks.lookup("stri")); + auto Strings = readStringTable(Chunks.lookup("stri"), Transform); if (!Strings) return Strings.takeError(); @@ -691,9 +697,10 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { } llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data, - SymbolOrigin Origin) { + SymbolOrigin Origin, + const URITransform *Transform) { if (Data.starts_with("RIFF")) { - return readRIFF(Data, Origin); + return readRIFF(Data, Origin, Transform); } if (auto YAMLContents = readYAML(Data, Origin)) { return std::move(*YAMLContents); diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index 0942155d8e898..4d6fa81bea1b5 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -54,7 +54,10 @@ struct IndexFileIn { std::optional<tooling::CompileCommand> Cmd; }; // Parse an index file. The input must be a RIFF or YAML file. -llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef, SymbolOrigin); +// If Transform is provided, use it to remap all URIs. +llvm::Expected<IndexFileIn> +readIndexFile(llvm::StringRef, SymbolOrigin, + const URITransform *Transform = nullptr); // Specifies the contents of an index file to be written. struct IndexFileOut { >From 2de8816918dccf4969269914de2eb52a3dda6013 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:42:33 -0500 Subject: [PATCH 07/18] Apply background index path mapping on store Before writing background index shards out to disk, apply any path mapping transformations. Note: additional storage was required to hold the transformed URIs. There may be a better method to handle this, but I tried to mirror the existing mechanism used for loading as much as possible. --- .../clangd/index/BackgroundIndexStorage.cpp | 2 ++ clang-tools-extra/clangd/index/Serialization.cpp | 16 ++++++++++++++-- clang-tools-extra/clangd/index/Serialization.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 4f7f8e09f4fac..2f129d3e79017 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -115,6 +115,8 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { IndexFileOut Shard) const override { auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier, Mappings); + if (!Mappings.empty()) + Shard.Transform = &StoreTransform; return llvm::writeToOutput(ShardPath, [&Shard](llvm::raw_ostream &OS) { OS << Shard; return llvm::Error::success(); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index f2e4e9ec3f14f..b053453e7ca63 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -163,12 +163,16 @@ void writeVar(uint32_t I, llvm::raw_ostream &OS) { // These are sorted to improve compression. // Maps each string to a canonical representation. -// Strings remain owned externally (e.g. by SymbolSlab). +// Strings remain owned externally (e.g. by SymbolSlab), except for strings +// that are transformed by path remapping. class StringTableOut { llvm::DenseSet<llvm::StringRef> Unique; std::vector<llvm::StringRef> Sorted; // Since strings are interned, look up can be by pointer. llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index; + llvm::BumpPtrAllocator Arena; + llvm::StringSaver TransformSaver{Arena}; + const URITransform *Transform = nullptr; public: StringTableOut() { @@ -176,8 +180,14 @@ class StringTableOut { // Table size zero is reserved to indicate no compression. Unique.insert(""); } + void setTransform(const URITransform *T) { Transform = T; } // Add a string to the table. Overwrites S if an identical string exists. - void intern(llvm::StringRef &S) { S = *Unique.insert(S).first; }; + // If path remapping is enabled, transform and store the new value. + void intern(llvm::StringRef &S) { + if (Transform && S.starts_with("file://")) + S = TransformSaver.save((*Transform)(S)); + S = *Unique.insert(S).first; + } // Finalize the table and write it to OS. No more strings may be added. void finalize(llvm::raw_ostream &OS) { Sorted = {Unique.begin(), Unique.end()}; @@ -576,6 +586,8 @@ void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) { RIFF.Chunks.push_back({riff::fourCC("meta"), Meta}); StringTableOut Strings; + if (Data.Transform) + Strings.setTransform(Data.Transform); std::vector<Symbol> Symbols; for (const auto &Sym : *Data.Symbols) { Symbols.emplace_back(Sym); diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index 4d6fa81bea1b5..50f958f8de88f 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -69,6 +69,7 @@ struct IndexFileOut { // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; const tooling::CompileCommand *Cmd = nullptr; + const URITransform *Transform = nullptr; IndexFileOut() = default; IndexFileOut(const IndexFileIn &I) >From 7bf257d9a396e8422927b3c0744f60b9a7a18269 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:47:31 -0500 Subject: [PATCH 08/18] Add lit tests to validate background path mappings 1. Test that shard filenames are based on the mapped URI 2. Test that the on-disk contents contain mapped paths 3. Test that loading on-disk contents reverses the mapping --- .../test/background-index-path-mappings.test | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 clang-tools-extra/clangd/test/background-index-path-mappings.test diff --git a/clang-tools-extra/clangd/test/background-index-path-mappings.test b/clang-tools-extra/clangd/test/background-index-path-mappings.test new file mode 100644 index 0000000000000..5e89d2ebe9fbd --- /dev/null +++ b/clang-tools-extra/clangd/test/background-index-path-mappings.test @@ -0,0 +1,87 @@ +# Use a copy of inputs, as we'll mutate it (as will the background index). +# RUN: rm -rf %/t +# RUN: cp -r %/S/Inputs/background-index %/t +# Need to embed the correct temp path in the actual JSON-RPC requests. +# RUN: sed -e "s|DIRECTORY|%/t|" %/t/definition.jsonrpc.tmpl > %/t/definition.jsonrpc.1 +# RUN: sed -e "s|DIRECTORY|%/t|" %/t/compile_commands.json.tmpl > %/t/compile_commands.json +# On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..." +# (with the extra slash in the front), so we add it here +# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t/definition.jsonrpc.1 > %/t/definition.jsonrpc + +# Create the background index files with path mappings +# RUN: clangd -background-index --background-index-path-mappings=%/t=/MAPPED_ROOT -lit-test < %/t/definition.jsonrpc | FileCheck %/t/definition.jsonrpc + +############################################################################### +# 1. Validate shard filenames use the mapped path (not actual path) for hashing +############################################################################### + +# The hash of "file:///MAPPED_ROOT/foo.cpp" is deterministic +# RUN: ls %/t/.cache/clangd/index/foo.cpp.*.idx | FileCheck --check-prefix=MAPPED-HASH %s +# MAPPED-HASH: foo.cpp.BE43CE222BC6EF16.idx + +############################################################################### +# 2. Validate shard on-disk contents contain mapped paths, not actual paths +############################################################################### + +# Copy the index file to a known location so we can pass it to dexp +# RUN: cp %/t/.cache/clangd/index/foo.cpp.*.idx %/t/foo.cpp.idx + +# Export the shard to YAML format to validate its contents +# RUN: dexp %/t/foo.cpp.idx -c "export %/t/foo.yaml -format=yaml" + +# RUN: FileCheck --check-prefix=SHARD-CONTENT %s < %/t/foo.yaml + +# Verify that the symbol 'foo' has URIs with /MAPPED_ROOT prefix +# SHARD-CONTENT: --- !Symbol +# SHARD-CONTENT: Name:{{.*}}foo +# SHARD-CONTENT: CanonicalDeclaration: +# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/sub_dir/foo.h +# SHARD-CONTENT: Definition: +# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/foo.cpp + +# Verify that IncludeHeaders also uses the mapped path +# SHARD-CONTENT: IncludeHeaders: +# SHARD-CONTENT: - Header:{{.*}}/MAPPED_ROOT/sub_dir/foo.h + +# Verify that Refs use the mapped path +# SHARD-CONTENT: --- !Refs +# SHARD-CONTENT: References: +# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/foo.cpp + +# Verify that Sources use the mapped path +# SHARD-CONTENT: --- !Source +# SHARD-CONTENT: URI:{{.*}}/MAPPED_ROOT/ + +# Verify that the Cmd section keeps original paths (not mapped), since compile +# commands are machine-specific +# SHARD-CONTENT: --- !Cmd +# SHARD-CONTENT: Directory: +# SHARD-CONTENT-NOT: MAPPED_ROOT + +############################################################################### +# 3. Validate loading shards reverses the path mapping to the local path +############################################################################### + +# Create "Client B" directory with a different path but same source content +# RUN: rm -rf %/t2 +# RUN: mkdir -p %/t2 +# RUN: cp -r %/S/Inputs/background-index/* %/t2/ + +# Copy "Client A" index data to "Client B" cache directory +# RUN: mkdir -p %/t2/.cache/clangd/index +# RUN: cp %/t/.cache/clangd/index/*.idx %/t2/.cache/clangd/index/ +# RUN: mkdir -p %/t2/sub_dir/.cache/clangd/index +# RUN: cp %/t/sub_dir/.cache/clangd/index/*.idx %/t2/sub_dir/.cache/clangd/index/ + +# Set up "Client B" compile_commands.json and request file +# RUN: sed -e "s|DIRECTORY|%/t2|" %/S/Inputs/background-index/compile_commands.json.tmpl > %/t2/compile_commands.json +# RUN: sed -e "s|DIRECTORY|%/t2|" %/S/Inputs/background-index/definition.jsonrpc.tmpl > %/t2/definition.jsonrpc.1 +# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t2/definition.jsonrpc.1 > %/t2/definition.jsonrpc + +# clangd should load "Client A" shards, mapping data to "Client B" local paths. +# Verify both that go-to-definition works (in definition.jsonrpc) and that the +# returned URI points to a "Client B" path. +# RUN: clangd -background-index --background-index-path-mappings=%/t2=/MAPPED_ROOT -lit-test < %/t2/definition.jsonrpc > %/t2/clangd-output.json +# RUN: FileCheck %/t2/definition.jsonrpc < %/t2/clangd-output.json +# RUN: FileCheck --check-prefix=ROUNDTRIP %s -DDIR=%/t2 < %/t2/clangd-output.json +# ROUNDTRIP: "uri": "file://[[DIR]]/foo.cpp" >From 915454622295d3c19039dcded6eac957b2744516 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 6 Feb 2026 15:49:41 -0500 Subject: [PATCH 09/18] Add Serialization unit tests for path remapping The primary purpose is to ensure if additional fields are added to the on-disk representation and they aren't mapped the test will fail. --- .../clangd/unittests/SerializationTests.cpp | 105 ++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index d18ae478c1653..3ee7bcff958aa 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -444,6 +444,111 @@ TEST(SerializationTest, NoCrashOnBadStringTableSize) { testing::HasSubstr("bytes is implausible")); } +// Verify path remapping is applied to all URI fields during load/store +TEST(SerializationTest, URITransformRoundTrip) { + URITransform WriteTransform = [](llvm::StringRef URI) -> std::string { + std::string S = URI.str(); + size_t Pos = S.find("/original/"); + if (Pos != std::string::npos) + S.replace(Pos, strlen("/original/"), "/transformed/"); + return S; + }; + URITransform ReadTransform = [](llvm::StringRef URI) -> std::string { + std::string S = URI.str(); + size_t Pos = S.find("/transformed/"); + if (Pos != std::string::npos) + S.replace(Pos, strlen("/transformed/"), "/original/"); + return S; + }; + + // Build an index containing "/original/" + Symbol Sym; + Sym.ID = cantFail(SymbolID::fromStr("057557CEBF6E6B2D")); + Sym.Name = "TestFunc"; + Sym.Scope = "ns::"; + Sym.Definition.FileURI = "file:///original/def.cpp"; + Sym.CanonicalDeclaration.FileURI = "file:///original/decl.h"; + Sym.IncludeHeaders.push_back({/*IncludeHeader=*/"file:///original/header.h", + /*References=*/1, + /*SupportedDirectives=*/Symbol::Include}); + Sym.IncludeHeaders.push_back( + {/*IncludeHeader=*/"<system_header>", // Literal, should not be modified + /*References=*/1, + /*SupportedDirectives=*/Symbol::Include}); + + SymbolSlab::Builder SymbolBuilder; + SymbolBuilder.insert(Sym); + SymbolSlab Symbols = std::move(SymbolBuilder).build(); + + Ref R; + R.Location.FileURI = "file:///original/ref.cpp"; + R.Kind = RefKind::Reference; + RefSlab::Builder RefBuilder; + RefBuilder.insert(Sym.ID, R); + RefSlab Refs = std::move(RefBuilder).build(); + + IncludeGraph Sources; + IncludeGraphNode IGN; + IGN.URI = "file:///original/source.cpp"; + IGN.Flags = IncludeGraphNode::SourceFlag::IsTU; + IGN.Digest = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'}; + IGN.DirectIncludes = {"file:///original/inc1.h", "file:///original/inc2.h"}; + Sources[IGN.URI] = IGN; + + IndexFileOut Out; + Out.Symbols = &Symbols; + Out.Refs = &Refs; + Out.Sources = &Sources; + Out.Format = IndexFileFormat::RIFF; + Out.Transform = &WriteTransform; + std::string Serialized = llvm::to_string(Out); + + // Verify the serialized data only contains "/transformed/". And if new fields + // are added, ensure they aren't missed by path mapping transformation logic. + EXPECT_TRUE(Serialized.find("/transformed/") != std::string::npos) + << "Serialized data should contain transformed URIs"; + EXPECT_TRUE(Serialized.find("/original/") == std::string::npos) + << "Serialized data should NOT contain original URIs"; + + // Deserialize to restore "/original/" + auto In = readIndexFile(Serialized, SymbolOrigin::Background, &ReadTransform); + ASSERT_TRUE(bool(In)) << In.takeError(); + + ASSERT_TRUE(In->Symbols); + auto &ReadSym = *In->Symbols->find(Sym.ID); + EXPECT_EQ(llvm::StringRef(ReadSym.Definition.FileURI), + "file:///original/def.cpp") + << "Symbol.Definition.FileURI not transformed"; + EXPECT_EQ(llvm::StringRef(ReadSym.CanonicalDeclaration.FileURI), + "file:///original/decl.h") + << "Symbol.CanonicalDeclaration.FileURI not transformed"; + ASSERT_EQ(ReadSym.IncludeHeaders.size(), 2u); + EXPECT_EQ(ReadSym.IncludeHeaders[0].IncludeHeader, + "file:///original/header.h") + << "Symbol.IncludeHeaders[0].IncludeHeader not transformed"; + EXPECT_EQ(ReadSym.IncludeHeaders[1].IncludeHeader, "<system_header>") + << "Literal include header should not be modified"; + + ASSERT_TRUE(In->Refs); + ASSERT_EQ(In->Refs->numRefs(), 1u); + auto RefIt = In->Refs->begin(); + EXPECT_EQ(RefIt->first, Sym.ID); + ASSERT_EQ(RefIt->second.size(), 1u); + EXPECT_EQ(llvm::StringRef(RefIt->second[0].Location.FileURI), + "file:///original/ref.cpp") + << "Ref.Location.FileURI not transformed"; + + ASSERT_TRUE(In->Sources); + // After load, sources are keyed by the restored URI + auto SourceIt = In->Sources->find("file:///original/source.cpp"); + ASSERT_NE(SourceIt, In->Sources->end()) << "Source URI key not transformed"; + EXPECT_EQ(SourceIt->second.URI, "file:///original/source.cpp") + << "IncludeGraphNode.URI not transformed"; + EXPECT_THAT(SourceIt->second.DirectIncludes, + ElementsAre("file:///original/inc1.h", "file:///original/inc2.h")) + << "IncludeGraphNode.DirectIncludes not transformed"; +} + } // namespace } // namespace clangd } // namespace clang >From 76a54e4bd9de7b2586c6a00ed29266a6d3445592 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Sat, 7 Feb 2026 18:31:37 -0500 Subject: [PATCH 10/18] Fix serialization test failing with zlib present Updated the test to pass on systems with and without zlib installed. --- .../clangd/unittests/SerializationTests.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index 3ee7bcff958aa..499b2f4c6abb3 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -503,14 +503,17 @@ TEST(SerializationTest, URITransformRoundTrip) { Out.Transform = &WriteTransform; std::string Serialized = llvm::to_string(Out); - // Verify the serialized data only contains "/transformed/". And if new fields - // are added, ensure they aren't missed by path mapping transformation logic. - EXPECT_TRUE(Serialized.find("/transformed/") != std::string::npos) - << "Serialized data should contain transformed URIs"; - EXPECT_TRUE(Serialized.find("/original/") == std::string::npos) - << "Serialized data should NOT contain original URIs"; - - // Deserialize to restore "/original/" + // Verify path mapping was applied by deserializing without the load + // transform. We cannot search raw bytes as the string table may be + // compressed. + auto Raw = readIndexFile(Serialized, SymbolOrigin::Background); + ASSERT_TRUE(bool(Raw)) << Raw.takeError(); + ASSERT_TRUE(Raw->Symbols); + EXPECT_EQ(llvm::StringRef(Raw->Symbols->find(Sym.ID)->Definition.FileURI), + "file:///transformed/def.cpp") + << "Write transform should have rewritten URIs on disk"; + + // Deserialize with load transform to restore "/original/" auto In = readIndexFile(Serialized, SymbolOrigin::Background, &ReadTransform); ASSERT_TRUE(bool(In)) << In.takeError(); >From dabb5fb8bd295f9c648548312ab4e284a0e8fe80 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Sat, 7 Feb 2026 20:29:06 -0500 Subject: [PATCH 11/18] Normalize path separators in getAbsolutePath Attempt to fix a CI failure exclusive to Windows. Normalize the path separators using the same pattern as FileSystemScheme::getAbsolutePath. --- clang-tools-extra/clangd/index/Background.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 17a8097394492..1bc1584563eca 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -74,6 +74,7 @@ llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { llvm::sys::path::append(AbsolutePath, Cmd.Filename); llvm::sys::path::remove_dots(AbsolutePath, true); } + llvm::sys::path::native(AbsolutePath); return AbsolutePath; } >From 343b1dddf67dfcc45387d9be8c0741cef16bb42d Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Mon, 9 Feb 2026 10:40:18 -0500 Subject: [PATCH 12/18] Handle potential for additional slash on Windows Attempt to fix another CI failure exclusive to Windows. I believe it is due to the potential extra slash in the Windows path, but I'm relying on CI as I do not have an available Windows system to validate locally. --- .../clangd/test/background-index-path-mappings.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/test/background-index-path-mappings.test b/clang-tools-extra/clangd/test/background-index-path-mappings.test index 5e89d2ebe9fbd..fb9482dc6238b 100644 --- a/clang-tools-extra/clangd/test/background-index-path-mappings.test +++ b/clang-tools-extra/clangd/test/background-index-path-mappings.test @@ -84,4 +84,5 @@ # RUN: clangd -background-index --background-index-path-mappings=%/t2=/MAPPED_ROOT -lit-test < %/t2/definition.jsonrpc > %/t2/clangd-output.json # RUN: FileCheck %/t2/definition.jsonrpc < %/t2/clangd-output.json # RUN: FileCheck --check-prefix=ROUNDTRIP %s -DDIR=%/t2 < %/t2/clangd-output.json -# ROUNDTRIP: "uri": "file://[[DIR]]/foo.cpp" +# Handle the extra slash needed on Windows +# ROUNDTRIP: "uri": "file://{{/?}}[[DIR]]/foo.cpp" >From e30c8d4ebf2dade1fa2a313af03347c6a3f31d42 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 13 Feb 2026 10:33:00 -0500 Subject: [PATCH 13/18] Use true path remapping over intermediate token Alter the background index path mapping approach to more strictly be about path mapping instead of introducing an intermediate, unusable index state. - Add API to map raw file paths instead of URIs - Update tests to validate the functionality and better reprsent the path mapping concept --- clang-tools-extra/clangd/PathMapping.cpp | 17 +++ clang-tools-extra/clangd/PathMapping.h | 6 + .../clangd/index/BackgroundIndexStorage.cpp | 12 +- .../test/background-index-path-mappings.test | 74 +++++-------- clang-tools-extra/clangd/tool/ClangdMain.cpp | 6 +- .../clangd/unittests/SerializationTests.cpp | 104 +++++++++++------- 6 files changed, 125 insertions(+), 94 deletions(-) diff --git a/clang-tools-extra/clangd/PathMapping.cpp b/clang-tools-extra/clangd/PathMapping.cpp index 4b93ff2c60c5c..964ef6482608a 100644 --- a/clang-tools-extra/clangd/PathMapping.cpp +++ b/clang-tools-extra/clangd/PathMapping.cpp @@ -45,6 +45,23 @@ std::optional<std::string> doPathMapping(llvm::StringRef S, return std::nullopt; } +std::optional<std::string> doFilePathMapping(llvm::StringRef FilePath, + PathMapping::Direction Dir, + const PathMappings &Mappings) { + for (const auto &Mapping : Mappings) { + const std::string &From = Dir == PathMapping::Direction::ClientToServer + ? Mapping.ClientPath + : Mapping.ServerPath; + const std::string &To = Dir == PathMapping::Direction::ClientToServer + ? Mapping.ServerPath + : Mapping.ClientPath; + if (FilePath.consume_front(From) && + (FilePath.empty() || FilePath.front() == '/')) + return (To + FilePath).str(); + } + return std::nullopt; +} + void applyPathMappings(llvm::json::Value &V, PathMapping::Direction Dir, const PathMappings &Mappings) { using Kind = llvm::json::Value::Kind; diff --git a/clang-tools-extra/clangd/PathMapping.h b/clang-tools-extra/clangd/PathMapping.h index 1893753392fc3..9c8c5a8f34120 100644 --- a/clang-tools-extra/clangd/PathMapping.h +++ b/clang-tools-extra/clangd/PathMapping.h @@ -54,6 +54,12 @@ std::optional<std::string> doPathMapping(llvm::StringRef S, PathMapping::Direction Dir, const PathMappings &Mappings); +/// Like doPathMapping, but operates directly on a file path rather than a +/// file:// URI. Returns std::nullopt if no mapping matches. +std::optional<std::string> doFilePathMapping(llvm::StringRef FilePath, + PathMapping::Direction Dir, + const PathMappings &Mappings); + /// Applies the \p Mappings to all the file:// URIs in \p Params. /// NOTE: The first matching mapping will be applied, otherwise \p Params will /// be untouched. diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 2f129d3e79017..5945a37bd59b8 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -39,15 +39,11 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, llvm::StringRef FilePath, const PathMappings &Mappings) { std::string HashInput; - if (Mappings.empty()) { + if (auto Remapped = doFilePathMapping( + FilePath, PathMapping::Direction::ClientToServer, Mappings)) + HashInput = std::move(*Remapped); + else HashInput = FilePath.str(); - } else { - // Hash the mapped URI so that shards are consistently named regardless of - // the path of the generating client - std::string FileURI = URI::createFile(FilePath).toString(); - HashInput = applyPathMappingToURI( - FileURI, PathMapping::Direction::ClientToServer, Mappings); - } llvm::SmallString<128> ShardRootSS(ShardRoot); llvm::sys::path::append(ShardRootSS, llvm::sys::path::filename(FilePath) + "." + diff --git a/clang-tools-extra/clangd/test/background-index-path-mappings.test b/clang-tools-extra/clangd/test/background-index-path-mappings.test index fb9482dc6238b..25531245393f0 100644 --- a/clang-tools-extra/clangd/test/background-index-path-mappings.test +++ b/clang-tools-extra/clangd/test/background-index-path-mappings.test @@ -1,88 +1,74 @@ -# Use a copy of inputs, as we'll mutate it (as will the background index). +# Generate a canonical index at %/t. A second client at %/t2 loads the shards, +# mapping its local paths to the canonical paths. The on-disk shards always +# contain canonical paths. + # RUN: rm -rf %/t # RUN: cp -r %/S/Inputs/background-index %/t -# Need to embed the correct temp path in the actual JSON-RPC requests. # RUN: sed -e "s|DIRECTORY|%/t|" %/t/definition.jsonrpc.tmpl > %/t/definition.jsonrpc.1 # RUN: sed -e "s|DIRECTORY|%/t|" %/t/compile_commands.json.tmpl > %/t/compile_commands.json # On Windows, we need the URI in didOpen to look like "uri":"file:///C:/..." -# (with the extra slash in the front), so we add it here # RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t/definition.jsonrpc.1 > %/t/definition.jsonrpc -# Create the background index files with path mappings -# RUN: clangd -background-index --background-index-path-mappings=%/t=/MAPPED_ROOT -lit-test < %/t/definition.jsonrpc | FileCheck %/t/definition.jsonrpc - -############################################################################### -# 1. Validate shard filenames use the mapped path (not actual path) for hashing -############################################################################### - -# The hash of "file:///MAPPED_ROOT/foo.cpp" is deterministic -# RUN: ls %/t/.cache/clangd/index/foo.cpp.*.idx | FileCheck --check-prefix=MAPPED-HASH %s -# MAPPED-HASH: foo.cpp.BE43CE222BC6EF16.idx +# Generate the canonical background index with no path mapping +# RUN: clangd -background-index -lit-test < %/t/definition.jsonrpc | FileCheck %/t/definition.jsonrpc ############################################################################### -# 2. Validate shard on-disk contents contain mapped paths, not actual paths +# 1. Validate shard contents contain canonical paths ############################################################################### -# Copy the index file to a known location so we can pass it to dexp # RUN: cp %/t/.cache/clangd/index/foo.cpp.*.idx %/t/foo.cpp.idx - -# Export the shard to YAML format to validate its contents # RUN: dexp %/t/foo.cpp.idx -c "export %/t/foo.yaml -format=yaml" +# RUN: FileCheck --check-prefix=SHARD-CONTENT %s -DDIR=%/t < %/t/foo.yaml -# RUN: FileCheck --check-prefix=SHARD-CONTENT %s < %/t/foo.yaml - -# Verify that the symbol 'foo' has URIs with /MAPPED_ROOT prefix # SHARD-CONTENT: --- !Symbol # SHARD-CONTENT: Name:{{.*}}foo # SHARD-CONTENT: CanonicalDeclaration: -# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/sub_dir/foo.h +# SHARD-CONTENT: FileURI:{{.*}}[[DIR]]/sub_dir/foo.h # SHARD-CONTENT: Definition: -# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/foo.cpp +# SHARD-CONTENT: FileURI:{{.*}}[[DIR]]/foo.cpp -# Verify that IncludeHeaders also uses the mapped path # SHARD-CONTENT: IncludeHeaders: -# SHARD-CONTENT: - Header:{{.*}}/MAPPED_ROOT/sub_dir/foo.h +# SHARD-CONTENT: - Header:{{.*}}[[DIR]]/sub_dir/foo.h -# Verify that Refs use the mapped path # SHARD-CONTENT: --- !Refs # SHARD-CONTENT: References: -# SHARD-CONTENT: FileURI:{{.*}}/MAPPED_ROOT/foo.cpp +# SHARD-CONTENT: FileURI:{{.*}}[[DIR]]/foo.cpp -# Verify that Sources use the mapped path # SHARD-CONTENT: --- !Source -# SHARD-CONTENT: URI:{{.*}}/MAPPED_ROOT/ - -# Verify that the Cmd section keeps original paths (not mapped), since compile -# commands are machine-specific -# SHARD-CONTENT: --- !Cmd -# SHARD-CONTENT: Directory: -# SHARD-CONTENT-NOT: MAPPED_ROOT +# SHARD-CONTENT: URI:{{.*}}[[DIR]]/ ############################################################################### -# 3. Validate loading shards reverses the path mapping to the local path +# 2. Validate the second client loads and remaps the shards ############################################################################### -# Create "Client B" directory with a different path but same source content +# Set up the second client's workspace with the same source content. # RUN: rm -rf %/t2 # RUN: mkdir -p %/t2 # RUN: cp -r %/S/Inputs/background-index/* %/t2/ -# Copy "Client A" index data to "Client B" cache directory +# Copy the canonical index data to the second client's cache directory. # RUN: mkdir -p %/t2/.cache/clangd/index # RUN: cp %/t/.cache/clangd/index/*.idx %/t2/.cache/clangd/index/ # RUN: mkdir -p %/t2/sub_dir/.cache/clangd/index # RUN: cp %/t/sub_dir/.cache/clangd/index/*.idx %/t2/sub_dir/.cache/clangd/index/ -# Set up "Client B" compile_commands.json and request file +# Set up the second client's compile_commands.json and request file. # RUN: sed -e "s|DIRECTORY|%/t2|" %/S/Inputs/background-index/compile_commands.json.tmpl > %/t2/compile_commands.json # RUN: sed -e "s|DIRECTORY|%/t2|" %/S/Inputs/background-index/definition.jsonrpc.tmpl > %/t2/definition.jsonrpc.1 # RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t2/definition.jsonrpc.1 > %/t2/definition.jsonrpc -# clangd should load "Client A" shards, mapping data to "Client B" local paths. -# Verify both that go-to-definition works (in definition.jsonrpc) and that the -# returned URI points to a "Client B" path. -# RUN: clangd -background-index --background-index-path-mappings=%/t2=/MAPPED_ROOT -lit-test < %/t2/definition.jsonrpc > %/t2/clangd-output.json +# The mapping %/t2=%/t tells clangd that the local path %/t2 corresponds to +# the canonical path %/t stored in the shards. clangd should load the shards +# and translate canonical paths to local paths in memory. Go-to-definition +# should work and return the local path. +# RUN: clangd -background-index --background-index-path-mappings=%/t2=%/t -lit-test < %/t2/definition.jsonrpc > %/t2/clangd-output.json # RUN: FileCheck %/t2/definition.jsonrpc < %/t2/clangd-output.json -# RUN: FileCheck --check-prefix=ROUNDTRIP %s -DDIR=%/t2 < %/t2/clangd-output.json +# RUN: FileCheck --check-prefix=LOCAL-RESULT %s -DDIR=%/t2 < %/t2/clangd-output.json # Handle the extra slash needed on Windows -# ROUNDTRIP: "uri": "file://{{/?}}[[DIR]]/foo.cpp" +# LOCAL-RESULT: "uri": "file://{{/?}}[[DIR]]/foo.cpp" + +# Verify the shard filenames are unchanged, proving that the canonical path +# mapping was used for hashing rather than re-indexing with local paths. +# RUN: ls %/t/.cache/clangd/index/ | sort > %/t/shards-canonical.txt +# RUN: ls %/t2/.cache/clangd/index/ | sort > %/t2/shards-local.txt +# RUN: diff %/t/shards-canonical.txt %/t2/shards-local.txt diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index f702db03907a1..71a45ad3da2d2 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -438,10 +438,10 @@ opt<bool> EnableTestScheme{ opt<std::string> BackgroundIndexPathMappings{ "background-index-path-mappings", cat(Protocol), - desc("Translate clients paths prior to writing background index files to " - "disk. Enables sharing of background index files between clients. " + desc("Translate client paths when reading and writing background index " + "files. Enables sharing of background index files between clients. " "Format is identical to --path-mappings. " - "e.g. /local/workspace=/TOKEN/workspace"), + "e.g. /home/project=/workarea/project"), init(""), }; diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index 499b2f4c6abb3..099a3fbbb7b80 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -444,33 +444,41 @@ TEST(SerializationTest, NoCrashOnBadStringTableSize) { testing::HasSubstr("bytes is implausible")); } -// Verify path remapping is applied to all URI fields during load/store +// Verify path remapping is applied to all URI fields during load/store. +// An index is generated at /home/project. A second client at /workarea/project +// loads and re-stores the shards. On-disk content always contains the +// /home/project paths so the index remains portable. TEST(SerializationTest, URITransformRoundTrip) { - URITransform WriteTransform = [](llvm::StringRef URI) -> std::string { + // Store transform: map /workarea/project -> /home/project so that + // on-disk content stays in the canonical /home/project paths. + URITransform StoreTransform = [](llvm::StringRef URI) -> std::string { std::string S = URI.str(); - size_t Pos = S.find("/original/"); + size_t Pos = S.find("/workarea/project/"); if (Pos != std::string::npos) - S.replace(Pos, strlen("/original/"), "/transformed/"); + S.replace(Pos, strlen("/workarea/project/"), "/home/project/"); return S; }; - URITransform ReadTransform = [](llvm::StringRef URI) -> std::string { + // Load transform: map /home/project -> /workarea/project so that + // in-memory paths match the local filesystem. + URITransform LoadTransform = [](llvm::StringRef URI) -> std::string { std::string S = URI.str(); - size_t Pos = S.find("/transformed/"); + size_t Pos = S.find("/home/project/"); if (Pos != std::string::npos) - S.replace(Pos, strlen("/transformed/"), "/original/"); + S.replace(Pos, strlen("/home/project/"), "/workarea/project/"); return S; }; - // Build an index containing "/original/" + // The index is generated with /home/project paths. Symbol Sym; Sym.ID = cantFail(SymbolID::fromStr("057557CEBF6E6B2D")); Sym.Name = "TestFunc"; Sym.Scope = "ns::"; - Sym.Definition.FileURI = "file:///original/def.cpp"; - Sym.CanonicalDeclaration.FileURI = "file:///original/decl.h"; - Sym.IncludeHeaders.push_back({/*IncludeHeader=*/"file:///original/header.h", - /*References=*/1, - /*SupportedDirectives=*/Symbol::Include}); + Sym.Definition.FileURI = "file:///home/project/def.cpp"; + Sym.CanonicalDeclaration.FileURI = "file:///home/project/decl.h"; + Sym.IncludeHeaders.push_back( + {/*IncludeHeader=*/"file:///home/project/header.h", + /*References=*/1, + /*SupportedDirectives=*/Symbol::Include}); Sym.IncludeHeaders.push_back( {/*IncludeHeader=*/"<system_header>", // Literal, should not be modified /*References=*/1, @@ -481,7 +489,7 @@ TEST(SerializationTest, URITransformRoundTrip) { SymbolSlab Symbols = std::move(SymbolBuilder).build(); Ref R; - R.Location.FileURI = "file:///original/ref.cpp"; + R.Location.FileURI = "file:///home/project/ref.cpp"; R.Kind = RefKind::Reference; RefSlab::Builder RefBuilder; RefBuilder.insert(Sym.ID, R); @@ -489,46 +497,46 @@ TEST(SerializationTest, URITransformRoundTrip) { IncludeGraph Sources; IncludeGraphNode IGN; - IGN.URI = "file:///original/source.cpp"; + IGN.URI = "file:///home/project/source.cpp"; IGN.Flags = IncludeGraphNode::SourceFlag::IsTU; IGN.Digest = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'}; - IGN.DirectIncludes = {"file:///original/inc1.h", "file:///original/inc2.h"}; + IGN.DirectIncludes = {"file:///home/project/inc1.h", + "file:///home/project/inc2.h"}; Sources[IGN.URI] = IGN; + // Serialize the index directly (no transform) to produce the canonical + // on-disk format containing /home/project paths. IndexFileOut Out; Out.Symbols = &Symbols; Out.Refs = &Refs; Out.Sources = &Sources; Out.Format = IndexFileFormat::RIFF; - Out.Transform = &WriteTransform; std::string Serialized = llvm::to_string(Out); - // Verify path mapping was applied by deserializing without the load - // transform. We cannot search raw bytes as the string table may be - // compressed. + // Verify the on-disk shard contains /home/project paths. auto Raw = readIndexFile(Serialized, SymbolOrigin::Background); ASSERT_TRUE(bool(Raw)) << Raw.takeError(); ASSERT_TRUE(Raw->Symbols); EXPECT_EQ(llvm::StringRef(Raw->Symbols->find(Sym.ID)->Definition.FileURI), - "file:///transformed/def.cpp") - << "Write transform should have rewritten URIs on disk"; + "file:///home/project/def.cpp") + << "On-disk shard should contain /home/project paths"; - // Deserialize with load transform to restore "/original/" - auto In = readIndexFile(Serialized, SymbolOrigin::Background, &ReadTransform); + // Load with the transform to map /home/project -> /workarea/project. + auto In = readIndexFile(Serialized, SymbolOrigin::Background, &LoadTransform); ASSERT_TRUE(bool(In)) << In.takeError(); ASSERT_TRUE(In->Symbols); auto &ReadSym = *In->Symbols->find(Sym.ID); EXPECT_EQ(llvm::StringRef(ReadSym.Definition.FileURI), - "file:///original/def.cpp") - << "Symbol.Definition.FileURI not transformed"; + "file:///workarea/project/def.cpp") + << "Symbol.Definition.FileURI not mapped"; EXPECT_EQ(llvm::StringRef(ReadSym.CanonicalDeclaration.FileURI), - "file:///original/decl.h") - << "Symbol.CanonicalDeclaration.FileURI not transformed"; + "file:///workarea/project/decl.h") + << "Symbol.CanonicalDeclaration.FileURI not mapped"; ASSERT_EQ(ReadSym.IncludeHeaders.size(), 2u); EXPECT_EQ(ReadSym.IncludeHeaders[0].IncludeHeader, - "file:///original/header.h") - << "Symbol.IncludeHeaders[0].IncludeHeader not transformed"; + "file:///workarea/project/header.h") + << "Symbol.IncludeHeaders[0].IncludeHeader not mapped"; EXPECT_EQ(ReadSym.IncludeHeaders[1].IncludeHeader, "<system_header>") << "Literal include header should not be modified"; @@ -538,18 +546,36 @@ TEST(SerializationTest, URITransformRoundTrip) { EXPECT_EQ(RefIt->first, Sym.ID); ASSERT_EQ(RefIt->second.size(), 1u); EXPECT_EQ(llvm::StringRef(RefIt->second[0].Location.FileURI), - "file:///original/ref.cpp") - << "Ref.Location.FileURI not transformed"; + "file:///workarea/project/ref.cpp") + << "Ref.Location.FileURI not mapped"; ASSERT_TRUE(In->Sources); - // After load, sources are keyed by the restored URI - auto SourceIt = In->Sources->find("file:///original/source.cpp"); - ASSERT_NE(SourceIt, In->Sources->end()) << "Source URI key not transformed"; - EXPECT_EQ(SourceIt->second.URI, "file:///original/source.cpp") - << "IncludeGraphNode.URI not transformed"; + auto SourceIt = In->Sources->find("file:///workarea/project/source.cpp"); + ASSERT_NE(SourceIt, In->Sources->end()) << "Source URI key not mapped"; + EXPECT_EQ(SourceIt->second.URI, "file:///workarea/project/source.cpp") + << "IncludeGraphNode.URI not mapped"; EXPECT_THAT(SourceIt->second.DirectIncludes, - ElementsAre("file:///original/inc1.h", "file:///original/inc2.h")) - << "IncludeGraphNode.DirectIncludes not transformed"; + ElementsAre("file:///workarea/project/inc1.h", + "file:///workarea/project/inc2.h")) + << "IncludeGraphNode.DirectIncludes not mapped"; + + // Re-serialize with the store transform. On-disk content should be + // back in /home/project paths so the index remains portable. + IndexFileOut WorkareaOut; + WorkareaOut.Symbols = &*In->Symbols; + WorkareaOut.Refs = &*In->Refs; + WorkareaOut.Sources = &*In->Sources; + WorkareaOut.Format = IndexFileFormat::RIFF; + WorkareaOut.Transform = &StoreTransform; + std::string WorkareaSerialized = llvm::to_string(WorkareaOut); + + auto Restored = readIndexFile(WorkareaSerialized, SymbolOrigin::Background); + ASSERT_TRUE(bool(Restored)) << Restored.takeError(); + ASSERT_TRUE(Restored->Symbols); + EXPECT_EQ( + llvm::StringRef(Restored->Symbols->find(Sym.ID)->Definition.FileURI), + "file:///home/project/def.cpp") + << "Store transform should restore /home/project paths on disk"; } } // namespace >From cc9aef63180cc33b663c22d554770ca817e04106 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 13 Feb 2026 10:34:29 -0500 Subject: [PATCH 14/18] Add test validating multiple path mappings --- .../test/background-index-path-mappings.test | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/clang-tools-extra/clangd/test/background-index-path-mappings.test b/clang-tools-extra/clangd/test/background-index-path-mappings.test index 25531245393f0..59abea42763c9 100644 --- a/clang-tools-extra/clangd/test/background-index-path-mappings.test +++ b/clang-tools-extra/clangd/test/background-index-path-mappings.test @@ -72,3 +72,31 @@ # RUN: ls %/t/.cache/clangd/index/ | sort > %/t/shards-canonical.txt # RUN: ls %/t2/.cache/clangd/index/ | sort > %/t2/shards-local.txt # RUN: diff %/t/shards-canonical.txt %/t2/shards-local.txt + +############################################################################### +# 3. Validate multiple comma-separated path mappings +############################################################################### + +# Set up a third client at %/t3 with the same sources and copied shards. +# RUN: rm -rf %/t3 +# RUN: mkdir -p %/t3 +# RUN: cp -r %/S/Inputs/background-index/* %/t3/ +# RUN: mkdir -p %/t3/.cache/clangd/index +# RUN: cp %/t/.cache/clangd/index/*.idx %/t3/.cache/clangd/index/ +# RUN: mkdir -p %/t3/sub_dir/.cache/clangd/index +# RUN: cp %/t/sub_dir/.cache/clangd/index/*.idx %/t3/sub_dir/.cache/clangd/index/ +# RUN: sed -e "s|DIRECTORY|%/t3|" %/S/Inputs/background-index/compile_commands.json.tmpl > %/t3/compile_commands.json +# RUN: sed -e "s|DIRECTORY|%/t3|" %/S/Inputs/background-index/definition.jsonrpc.tmpl > %/t3/definition.jsonrpc.1 +# RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %/t3/definition.jsonrpc.1 > %/t3/definition.jsonrpc + +# The first specified mapping is intentionally non-matching; the second is the +# real mapping. This validates that multiple mappings can be specified and the +# correct one is applied. +# RUN: clangd -background-index --background-index-path-mappings=/no/match=/also/no/match,%/t3=%/t -lit-test < %/t3/definition.jsonrpc > %/t3/clangd-output.json +# RUN: FileCheck %/t3/definition.jsonrpc < %/t3/clangd-output.json +# RUN: FileCheck --check-prefix=MULTI-LOCAL %s -DDIR=%/t3 < %/t3/clangd-output.json +# MULTI-LOCAL: "uri": "file://{{/?}}[[DIR]]/foo.cpp" + +# Verify shard filenames are unchanged with multiple mappings +# RUN: ls %/t3/.cache/clangd/index/ | sort > %/t3/shards-multi.txt +# RUN: diff %/t/shards-canonical.txt %/t3/shards-multi.txt >From 801df5135e0a87f2ab8a1fc0f363548d67a218dd Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 13 Feb 2026 10:36:03 -0500 Subject: [PATCH 15/18] Map paths for compile commands data within shards - Generalize path mapping to work with URIs and raw paths - Handle specific cases where compilation flags might require more specific substitution than prefix matching - Add unit testing to validate compile commands are mapped when serialized --- .../clangd/index/BackgroundIndexStorage.cpp | 43 +++++++++++++------ .../clangd/index/Serialization.cpp | 10 +++-- .../clangd/unittests/SerializationTests.cpp | 36 ++++++++++++++++ 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 5945a37bd59b8..3e1b39dc161ee 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -26,13 +26,32 @@ namespace clang { namespace clangd { namespace { -// Apply path mapping to file URI. Return original URI if no mapping applies. -std::string applyPathMappingToURI(llvm::StringRef URI, - PathMapping::Direction Direction, - const PathMappings &Mappings) { - if (auto Mapped = doPathMapping(URI, Direction, Mappings)) +// Apply path mapping to file:// URIs or raw file paths. Return unmodified +// path if no mapping was applied. +std::string applyPathMapping(llvm::StringRef S, + PathMapping::Direction Direction, + const PathMappings &Mappings) { + // First, attempt URI mapping + if (auto Mapped = doPathMapping(S, Direction, Mappings)) return std::move(*Mapped); - return URI.str(); + // If that didn't match, attempt file path mapping. Paths processed here may + // be standalone or after a flag: -I, -isystem, and so on. + for (const auto &Mapping : Mappings) { + const std::string &From = + Direction == PathMapping::Direction::ClientToServer + ? Mapping.ClientPath + : Mapping.ServerPath; + const std::string &To = Direction == PathMapping::Direction::ClientToServer + ? Mapping.ServerPath + : Mapping.ClientPath; + size_t Pos = S.find(From); + if (Pos != llvm::StringRef::npos) { + llvm::StringRef After = S.substr(Pos + From.size()); + if (After.empty() || After.front() == '/') + return (S.substr(0, Pos) + To + After).str(); + } + } + return S.str(); } std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, @@ -64,14 +83,14 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { : DiskShardRoot(Directory), Mappings(std::move(Mappings)) { // Background path mappings are specified as /local/path=/canonical/path. // During load we transform from canonical to local (ServerToClient). - LoadTransform = [this](llvm::StringRef URI) { - return applyPathMappingToURI(URI, PathMapping::Direction::ServerToClient, - this->Mappings); + LoadTransform = [this](llvm::StringRef S) { + return applyPathMapping(S, PathMapping::Direction::ServerToClient, + this->Mappings); }; // During store we transform from local to canonical (ClientToServer). - StoreTransform = [this](llvm::StringRef URI) { - return applyPathMappingToURI(URI, PathMapping::Direction::ClientToServer, - this->Mappings); + StoreTransform = [this](llvm::StringRef S) { + return applyPathMapping(S, PathMapping::Direction::ClientToServer, + this->Mappings); }; std::error_code OK; std::error_code EC = llvm::sys::fs::create_directories(DiskShardRoot); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index b053453e7ca63..1ea023125acf8 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -184,8 +184,11 @@ class StringTableOut { // Add a string to the table. Overwrites S if an identical string exists. // If path remapping is enabled, transform and store the new value. void intern(llvm::StringRef &S) { - if (Transform && S.starts_with("file://")) - S = TransformSaver.save((*Transform)(S)); + if (Transform) { + std::string Transformed = (*Transform)(S); + if (Transformed != S) + S = TransformSaver.save(std::move(Transformed)); + } S = *Unique.insert(S).first; } // Finalize the table and write it to OS. No more strings may be added. @@ -261,8 +264,7 @@ readStringTable(llvm::StringRef Data, const URITransform *Transform = nullptr) { if (Len == llvm::StringRef::npos) return error("Bad string table: not null terminated"); llvm::StringRef S = R.consume(Len); - // Apply any provided path mapping transform to incoming file:// URIs - if (Transform && S.starts_with("file://")) + if (Transform) Table.Strings.push_back(Saver.save((*Transform)(S))); else Table.Strings.push_back(Saver.save(S)); diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index 099a3fbbb7b80..cd00b535e5792 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -504,12 +504,24 @@ TEST(SerializationTest, URITransformRoundTrip) { "file:///home/project/inc2.h"}; Sources[IGN.URI] = IGN; + tooling::CompileCommand Cmd; + Cmd.Directory = "/home/project/src"; + Cmd.CommandLine = {"/usr/bin/clang++", + "-I/home/project/include", + "-isystem/home/project/sysinclude", + "-isystem", + "/home/project/sysinclude2", + "-DFOO=bar", + "-DROOT=/home/project/src", + "/home/project/src/test.cpp"}; + // Serialize the index directly (no transform) to produce the canonical // on-disk format containing /home/project paths. IndexFileOut Out; Out.Symbols = &Symbols; Out.Refs = &Refs; Out.Sources = &Sources; + Out.Cmd = &Cmd; Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); @@ -520,6 +532,8 @@ TEST(SerializationTest, URITransformRoundTrip) { EXPECT_EQ(llvm::StringRef(Raw->Symbols->find(Sym.ID)->Definition.FileURI), "file:///home/project/def.cpp") << "On-disk shard should contain /home/project paths"; + ASSERT_TRUE(Raw->Cmd); + EXPECT_EQ(Raw->Cmd->Directory, "/home/project/src"); // Load with the transform to map /home/project -> /workarea/project. auto In = readIndexFile(Serialized, SymbolOrigin::Background, &LoadTransform); @@ -559,12 +573,24 @@ TEST(SerializationTest, URITransformRoundTrip) { "file:///workarea/project/inc2.h")) << "IncludeGraphNode.DirectIncludes not mapped"; + ASSERT_TRUE(In->Cmd); + EXPECT_EQ(In->Cmd->Directory, "/workarea/project/src") + << "Cmd.Directory not mapped"; + EXPECT_THAT(In->Cmd->CommandLine, + ElementsAre("/usr/bin/clang++", "-I/workarea/project/include", + "-isystem/workarea/project/sysinclude", "-isystem", + "/workarea/project/sysinclude2", "-DFOO=bar", + "-DROOT=/workarea/project/src", + "/workarea/project/src/test.cpp")) + << "Cmd.CommandLine not mapped"; + // Re-serialize with the store transform. On-disk content should be // back in /home/project paths so the index remains portable. IndexFileOut WorkareaOut; WorkareaOut.Symbols = &*In->Symbols; WorkareaOut.Refs = &*In->Refs; WorkareaOut.Sources = &*In->Sources; + WorkareaOut.Cmd = In->Cmd ? &*In->Cmd : nullptr; WorkareaOut.Format = IndexFileFormat::RIFF; WorkareaOut.Transform = &StoreTransform; std::string WorkareaSerialized = llvm::to_string(WorkareaOut); @@ -576,6 +602,16 @@ TEST(SerializationTest, URITransformRoundTrip) { llvm::StringRef(Restored->Symbols->find(Sym.ID)->Definition.FileURI), "file:///home/project/def.cpp") << "Store transform should restore /home/project paths on disk"; + ASSERT_TRUE(Restored->Cmd); + EXPECT_EQ(Restored->Cmd->Directory, "/home/project/src") + << "Store transform should restore Cmd.Directory on disk"; + EXPECT_THAT(Restored->Cmd->CommandLine, + ElementsAre("/usr/bin/clang++", "-I/home/project/include", + "-isystem/home/project/sysinclude", "-isystem", + "/home/project/sysinclude2", "-DFOO=bar", + "-DROOT=/home/project/src", + "/home/project/src/test.cpp")) + << "Store transform should restore Cmd.CommandLine on disk"; } } // namespace >From 28124f0c2eb09c294e8a7d7a811536ec809ee1c1 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 13 Feb 2026 10:37:49 -0500 Subject: [PATCH 16/18] Rename URITransform to PathTransform --- .../clangd/index/BackgroundIndexStorage.cpp | 8 ++++---- clang-tools-extra/clangd/index/Serialization.cpp | 11 ++++++----- clang-tools-extra/clangd/index/Serialization.h | 8 ++++---- .../clangd/unittests/SerializationTests.cpp | 6 +++--- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp index 3e1b39dc161ee..6207d9ef4dfa2 100644 --- a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp +++ b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp @@ -8,7 +8,6 @@ #include "GlobalCompilationDatabase.h" #include "PathMapping.h" -#include "URI.h" #include "index/Background.h" #include "support/Logger.h" #include "support/Path.h" @@ -74,8 +73,8 @@ std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, class DiskBackedIndexStorage : public BackgroundIndexStorage { std::string DiskShardRoot; PathMappings Mappings; - URITransform LoadTransform; - URITransform StoreTransform; + PathTransform LoadTransform; + PathTransform StoreTransform; public: // Creates `DiskShardRoot` and any parents during construction. @@ -116,7 +115,8 @@ class DiskBackedIndexStorage : public BackgroundIndexStorage { auto Buffer = llvm::MemoryBuffer::getFile(ShardPath); if (!Buffer) return nullptr; - const URITransform *Transform = Mappings.empty() ? nullptr : &LoadTransform; + const PathTransform *Transform = + Mappings.empty() ? nullptr : &LoadTransform; if (auto I = readIndexFile(Buffer->get()->getBuffer(), SymbolOrigin::Background, Transform)) return std::make_unique<IndexFileIn>(std::move(*I)); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 1ea023125acf8..22f4f2a90123e 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -172,7 +172,7 @@ class StringTableOut { llvm::DenseMap<std::pair<const char *, size_t>, unsigned> Index; llvm::BumpPtrAllocator Arena; llvm::StringSaver TransformSaver{Arena}; - const URITransform *Transform = nullptr; + const PathTransform *Transform = nullptr; public: StringTableOut() { @@ -180,7 +180,7 @@ class StringTableOut { // Table size zero is reserved to indicate no compression. Unique.insert(""); } - void setTransform(const URITransform *T) { Transform = T; } + void setTransform(const PathTransform *T) { Transform = T; } // Add a string to the table. Overwrites S if an identical string exists. // If path remapping is enabled, transform and store the new value. void intern(llvm::StringRef &S) { @@ -228,7 +228,8 @@ struct StringTableIn { }; llvm::Expected<StringTableIn> -readStringTable(llvm::StringRef Data, const URITransform *Transform = nullptr) { +readStringTable(llvm::StringRef Data, + const PathTransform *Transform = nullptr) { Reader R(Data); size_t UncompressedSize = R.consume32(); if (R.err()) @@ -478,7 +479,7 @@ readCompileCommand(Reader CmdReader, llvm::ArrayRef<llvm::StringRef> Strings) { constexpr static uint32_t Version = 20; llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data, SymbolOrigin Origin, - const URITransform *Transform) { + const PathTransform *Transform) { auto RIFF = riff::readFile(Data); if (!RIFF) return RIFF.takeError(); @@ -712,7 +713,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) { llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef Data, SymbolOrigin Origin, - const URITransform *Transform) { + const PathTransform *Transform) { if (Data.starts_with("RIFF")) { return readRIFF(Data, Origin, Transform); } diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index 50f958f8de88f..d94a075dbb235 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -35,8 +35,8 @@ namespace clang { namespace clangd { -// Used to remap URIs during serialization/deserialization -using URITransform = llvm::unique_function<std::string(llvm::StringRef) const>; +// Used to remap URIs and paths during serialization/deserialization. +using PathTransform = llvm::unique_function<std::string(llvm::StringRef) const>; enum class IndexFileFormat { RIFF, // Versioned binary format, suitable for production use. @@ -57,7 +57,7 @@ struct IndexFileIn { // If Transform is provided, use it to remap all URIs. llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef, SymbolOrigin, - const URITransform *Transform = nullptr); + const PathTransform *Transform = nullptr); // Specifies the contents of an index file to be written. struct IndexFileOut { @@ -69,7 +69,7 @@ struct IndexFileOut { // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; const tooling::CompileCommand *Cmd = nullptr; - const URITransform *Transform = nullptr; + const PathTransform *Transform = nullptr; IndexFileOut() = default; IndexFileOut(const IndexFileIn &I) diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index cd00b535e5792..652554d98ae34 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -448,10 +448,10 @@ TEST(SerializationTest, NoCrashOnBadStringTableSize) { // An index is generated at /home/project. A second client at /workarea/project // loads and re-stores the shards. On-disk content always contains the // /home/project paths so the index remains portable. -TEST(SerializationTest, URITransformRoundTrip) { +TEST(SerializationTest, PathTransformRoundTrip) { // Store transform: map /workarea/project -> /home/project so that // on-disk content stays in the canonical /home/project paths. - URITransform StoreTransform = [](llvm::StringRef URI) -> std::string { + PathTransform StoreTransform = [](llvm::StringRef URI) -> std::string { std::string S = URI.str(); size_t Pos = S.find("/workarea/project/"); if (Pos != std::string::npos) @@ -460,7 +460,7 @@ TEST(SerializationTest, URITransformRoundTrip) { }; // Load transform: map /home/project -> /workarea/project so that // in-memory paths match the local filesystem. - URITransform LoadTransform = [](llvm::StringRef URI) -> std::string { + PathTransform LoadTransform = [](llvm::StringRef URI) -> std::string { std::string S = URI.str(); size_t Pos = S.find("/home/project/"); if (Pos != std::string::npos) >From 9f2d3374e6007ab9716e6b89a57491e6748ef9a3 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 13 Feb 2026 10:38:54 -0500 Subject: [PATCH 17/18] Add logging for all mappings on startup --- clang-tools-extra/clangd/tool/ClangdMain.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 71a45ad3da2d2..cc69b36cf8080 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -955,6 +955,8 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var return 1; } Opts.BackgroundIndexPathMappings = std::move(*Mappings); + for (const auto &M : Opts.BackgroundIndexPathMappings) + log("Background index path mapping: {0}", M); } Opts.ReferencesLimit = ReferencesLimit; Opts.Rename.LimitFiles = RenameFileLimit; >From cfff566803e534ff8b674d9090b7563001582838 Mon Sep 17 00:00:00 2001 From: Justin Cady <[email protected]> Date: Fri, 13 Feb 2026 13:38:04 -0500 Subject: [PATCH 18/18] Fix Windows path handling by operating on URIs This has the additional benefit of de-duplicating some logic that I introduced in a prior commit. This issue was detected by the Windows premerge check failing. --- clang-tools-extra/clangd/PathMapping.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clangd/PathMapping.cpp b/clang-tools-extra/clangd/PathMapping.cpp index 964ef6482608a..b1e879f17c7cc 100644 --- a/clang-tools-extra/clangd/PathMapping.cpp +++ b/clang-tools-extra/clangd/PathMapping.cpp @@ -48,16 +48,20 @@ std::optional<std::string> doPathMapping(llvm::StringRef S, std::optional<std::string> doFilePathMapping(llvm::StringRef FilePath, PathMapping::Direction Dir, const PathMappings &Mappings) { - for (const auto &Mapping : Mappings) { - const std::string &From = Dir == PathMapping::Direction::ClientToServer - ? Mapping.ClientPath - : Mapping.ServerPath; - const std::string &To = Dir == PathMapping::Direction::ClientToServer - ? Mapping.ServerPath - : Mapping.ClientPath; - if (FilePath.consume_front(From) && - (FilePath.empty() || FilePath.front() == '/')) - return (To + FilePath).str(); + // Convert the file path to a file:// URI, apply the mapping, then resolve + // back to a native path. Reuse doPathMapping to hande platform specific + // path normalization and mapping logic in one place. + std::string URIStr = URI::createFile(FilePath).toString(); + if (auto Mapped = doPathMapping(URIStr, Dir, Mappings)) { + auto ParsedURI = URI::parse(*Mapped); + if (!ParsedURI) { + llvm::consumeError(ParsedURI.takeError()); + return std::nullopt; + } + if (auto Path = URI::resolve(*ParsedURI, FilePath)) + return std::move(*Path); + else + llvm::consumeError(Path.takeError()); } return std::nullopt; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
