https://github.com/Sirraide updated https://github.com/llvm/llvm-project/pull/143520
>From 15c0a79d6a0cd65d88fbe152275b224201e632a1 Mon Sep 17 00:00:00 2001 From: Sirraide <aeternalm...@gmail.com> Date: Tue, 10 Jun 2025 14:32:32 +0200 Subject: [PATCH 1/6] [Clang] [Diagnostics] Simplify filenames that contain '..' --- clang/include/clang/Frontend/TextDiagnostic.h | 1 + clang/lib/Frontend/TextDiagnostic.cpp | 32 ++++++++++++------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index e2e88d4d648a2..9c77bc3e00e19 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -35,6 +35,7 @@ namespace clang { class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; const Preprocessor *PP; + llvm::StringMap<SmallString<128>> SimplifiedFileNameCache; public: TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index b9e681b52e509..edbad42b39950 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -738,12 +738,20 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, } void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { -#ifdef _WIN32 - SmallString<4096> TmpFilename; -#endif - if (DiagOpts.AbsolutePath) { - auto File = SM.getFileManager().getOptionalFileRef(Filename); - if (File) { + auto File = SM.getFileManager().getOptionalFileRef(Filename); + + // Try to simplify paths that contain '..' in any case since paths to + // standard library headers especially tend to get quite long otherwise. + // Only do that for local filesystems though to avoid slowing down + // compilation too much. + auto AlwaysSimplify = [&] { + return File->getName().contains("..") && + llvm::sys::fs::is_local(File->getName()); + }; + + if (File && (DiagOpts.AbsolutePath || AlwaysSimplify())) { + SmallString<128> &CacheEntry = SimplifiedFileNameCache[Filename]; + if (CacheEntry.empty()) { // We want to print a simplified absolute path, i. e. without "dots". // // The hardest part here are the paths like "<part1>/<link>/../<part2>". @@ -759,15 +767,15 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { // on Windows we can just use llvm::sys::path::remove_dots(), because, // on that system, both aforementioned paths point to the same place. #ifdef _WIN32 - TmpFilename = File->getName(); - llvm::sys::fs::make_absolute(TmpFilename); - llvm::sys::path::native(TmpFilename); - llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true); - Filename = StringRef(TmpFilename.data(), TmpFilename.size()); + CacheEntry = File->getName(); + llvm::sys::fs::make_absolute(CacheEntry); + llvm::sys::path::native(CacheEntry); + llvm::sys::path::remove_dots(CacheEntry, /* remove_dot_dot */ true); #else - Filename = SM.getFileManager().getCanonicalName(*File); + CacheEntry = SM.getFileManager().getCanonicalName(*File); #endif } + Filename = CacheEntry; } OS << Filename; >From 97c0accf4fb3dc983bbb9f344ad66d0281b231fb Mon Sep 17 00:00:00 2001 From: Sirraide <aeternalm...@gmail.com> Date: Tue, 10 Jun 2025 14:47:43 +0200 Subject: [PATCH 2/6] Use whichever path ends up being shorter --- clang/lib/Frontend/TextDiagnostic.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index edbad42b39950..b77c1797c51c5 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -774,6 +774,12 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { #else CacheEntry = SM.getFileManager().getCanonicalName(*File); #endif + + // In some cases, the resolved path may actually end up being longer (e.g. + // if it was originally a relative path), so just retain whichever one + // ends up being shorter. + if (!DiagOpts.AbsolutePath && CacheEntry.size() > Filename.size()) + CacheEntry = Filename; } Filename = CacheEntry; } >From 529aac1a9ae4969b4389156cb5d44fcecd445cbc Mon Sep 17 00:00:00 2001 From: Sirraide <aeternalm...@gmail.com> Date: Tue, 10 Jun 2025 20:36:14 +0200 Subject: [PATCH 3/6] Move path computation into SourceManager --- clang/include/clang/Basic/SourceManager.h | 9 ++++ clang/lib/Basic/SourceManager.cpp | 55 +++++++++++++++++++++++ clang/lib/Frontend/SARIFDiagnostic.cpp | 31 +------------ clang/lib/Frontend/TextDiagnostic.cpp | 48 +------------------- 4 files changed, 66 insertions(+), 77 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index cd3dac9133223..92080e7891eca 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -820,6 +820,12 @@ class SourceManager : public RefCountedBase<SourceManager> { mutable std::unique_ptr<SrcMgr::SLocEntry> FakeSLocEntryForRecovery; + /// Cache for filenames used in diagnostics. See 'getNameForDiagnostic()'. + mutable llvm::StringMap<StringRef> DiagNames; + + /// Allocator for absolute/short names. + mutable llvm::BumpPtrAllocator DiagNameAlloc; + /// Lazily computed map of macro argument chunks to their expanded /// source location. using MacroArgsMap = std::map<unsigned, SourceLocation>; @@ -1847,6 +1853,9 @@ class SourceManager : public RefCountedBase<SourceManager> { /// \return Location of the top-level macro caller. SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const; + /// Retrieve the name of a file suitable for diagnostics. + StringRef getNameForDiagnostic(StringRef Filename) const; + private: friend class ASTReader; friend class ASTWriter; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 09e5c6547fb51..c726f86e2d4e0 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -2403,3 +2403,58 @@ SourceManagerForFile::SourceManagerForFile(StringRef FileName, assert(ID.isValid()); SourceMgr->setMainFileID(ID); } + +StringRef SourceManager::getNameForDiagnostic(StringRef Filename) const { + OptionalFileEntryRef File = getFileManager().getOptionalFileRef(Filename); + if (!File) + return Filename; + + // Try to simplify paths that contain '..' in any case since paths to + // standard library headers especially tend to get quite long otherwise. + // Only do that for local filesystems though to avoid slowing down + // compilation too much. + bool Absolute = Diag.getDiagnosticOptions().AbsolutePath; + bool AlwaysSimplify = File->getName().contains("..") && + llvm::sys::fs::is_local(File->getName()); + if (!Absolute && !AlwaysSimplify) + return Filename; + + // This may involve computing canonical names, so cache the result. + StringRef &CacheEntry = DiagNames[Filename]; + if (!CacheEntry.empty()) + return CacheEntry; + + // We want to print a simplified absolute path, i. e. without "dots". + // + // The hardest part here are the paths like "<part1>/<link>/../<part2>". + // On Unix-like systems, we cannot just collapse "<link>/..", because + // paths are resolved sequentially, and, thereby, the path + // "<part1>/<part2>" may point to a different location. That is why + // we use FileManager::getCanonicalName(), which expands all indirections + // with llvm::sys::fs::real_path() and caches the result. + // + // On the other hand, it would be better to preserve as much of the + // original path as possible, because that helps a user to recognize it. + // real_path() expands all links, which sometimes too much. Luckily, + // on Windows we can just use llvm::sys::path::remove_dots(), because, + // on that system, both aforementioned paths point to the same place. + SmallString<256> TempBuf; +#ifdef _WIN32 + TempBuf = File->getName(); + llvm::sys::fs::make_absolute(TempBuf); + llvm::sys::path::native(TempBuf); + llvm::sys::path::remove_dots(TempBuf, /* remove_dot_dot */ true); +#else + TempBuf = getFileManager().getCanonicalName(*File); +#endif + + // In some cases, the resolved path may actually end up being longer (e.g. + // if it was originally a relative path), so just retain whichever one + // ends up being shorter. + if (!Absolute && TempBuf.size() > Filename.size()) + CacheEntry = Filename; + else + CacheEntry = TempBuf.str().copy(DiagNameAlloc); + + return CacheEntry; +} diff --git a/clang/lib/Frontend/SARIFDiagnostic.cpp b/clang/lib/Frontend/SARIFDiagnostic.cpp index e2aec7f677f12..c2f6ece2e91ff 100644 --- a/clang/lib/Frontend/SARIFDiagnostic.cpp +++ b/clang/lib/Frontend/SARIFDiagnostic.cpp @@ -163,36 +163,7 @@ SARIFDiagnostic::addDiagnosticLevelToRule(SarifRule Rule, llvm::StringRef SARIFDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { - if (DiagOpts.AbsolutePath) { - auto File = SM.getFileManager().getOptionalFileRef(Filename); - if (File) { - // We want to print a simplified absolute path, i. e. without "dots". - // - // The hardest part here are the paths like "<part1>/<link>/../<part2>". - // On Unix-like systems, we cannot just collapse "<link>/..", because - // paths are resolved sequentially, and, thereby, the path - // "<part1>/<part2>" may point to a different location. That is why - // we use FileManager::getCanonicalName(), which expands all indirections - // with llvm::sys::fs::real_path() and caches the result. - // - // On the other hand, it would be better to preserve as much of the - // original path as possible, because that helps a user to recognize it. - // real_path() expands all links, which is sometimes too much. Luckily, - // on Windows we can just use llvm::sys::path::remove_dots(), because, - // on that system, both aforementioned paths point to the same place. -#ifdef _WIN32 - SmallString<256> TmpFilename = File->getName(); - llvm::sys::fs::make_absolute(TmpFilename); - llvm::sys::path::native(TmpFilename); - llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true); - Filename = StringRef(TmpFilename.data(), TmpFilename.size()); -#else - Filename = SM.getFileManager().getCanonicalName(*File); -#endif - } - } - - return Filename; + return SM.getNameForDiagnostic(Filename); } /// Print out the file/line/column information and include trace. diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index b77c1797c51c5..354331f920aa4 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -738,53 +738,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, } void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { - auto File = SM.getFileManager().getOptionalFileRef(Filename); - - // Try to simplify paths that contain '..' in any case since paths to - // standard library headers especially tend to get quite long otherwise. - // Only do that for local filesystems though to avoid slowing down - // compilation too much. - auto AlwaysSimplify = [&] { - return File->getName().contains("..") && - llvm::sys::fs::is_local(File->getName()); - }; - - if (File && (DiagOpts.AbsolutePath || AlwaysSimplify())) { - SmallString<128> &CacheEntry = SimplifiedFileNameCache[Filename]; - if (CacheEntry.empty()) { - // We want to print a simplified absolute path, i. e. without "dots". - // - // The hardest part here are the paths like "<part1>/<link>/../<part2>". - // On Unix-like systems, we cannot just collapse "<link>/..", because - // paths are resolved sequentially, and, thereby, the path - // "<part1>/<part2>" may point to a different location. That is why - // we use FileManager::getCanonicalName(), which expands all indirections - // with llvm::sys::fs::real_path() and caches the result. - // - // On the other hand, it would be better to preserve as much of the - // original path as possible, because that helps a user to recognize it. - // real_path() expands all links, which sometimes too much. Luckily, - // on Windows we can just use llvm::sys::path::remove_dots(), because, - // on that system, both aforementioned paths point to the same place. -#ifdef _WIN32 - CacheEntry = File->getName(); - llvm::sys::fs::make_absolute(CacheEntry); - llvm::sys::path::native(CacheEntry); - llvm::sys::path::remove_dots(CacheEntry, /* remove_dot_dot */ true); -#else - CacheEntry = SM.getFileManager().getCanonicalName(*File); -#endif - - // In some cases, the resolved path may actually end up being longer (e.g. - // if it was originally a relative path), so just retain whichever one - // ends up being shorter. - if (!DiagOpts.AbsolutePath && CacheEntry.size() > Filename.size()) - CacheEntry = Filename; - } - Filename = CacheEntry; - } - - OS << Filename; + OS << SM.getNameForDiagnostic(Filename); } /// Print out the file/line/column information and include trace. >From 5d308877424532874a06735707236474c03ddfab Mon Sep 17 00:00:00 2001 From: Sirraide <aeternalm...@gmail.com> Date: Fri, 13 Jun 2025 20:32:59 +0200 Subject: [PATCH 4/6] Fix crash and disable nfs check on windows --- clang/include/clang/Basic/SourceManager.h | 9 +++++- clang/lib/Basic/SourceManager.cpp | 37 +++++++++++++++++------ clang/lib/Frontend/SARIFDiagnostic.cpp | 2 +- clang/lib/Frontend/TextDiagnostic.cpp | 2 +- clang/test/Frontend/absolute-paths.c | 6 ++-- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 92080e7891eca..bbf38f40a9534 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -1854,7 +1854,14 @@ class SourceManager : public RefCountedBase<SourceManager> { SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const; /// Retrieve the name of a file suitable for diagnostics. - StringRef getNameForDiagnostic(StringRef Filename) const; + // FIXME: Passing in the DiagnosticOptions here is a workaround for the + // fact that installapi does some weird things with DiagnosticsEngines, + // which causes the 'Diag' member of SourceManager (or at least the + // DiagnosticsOptions member thereof) to be a dangling reference + // sometimes. We should probably fix that or decouple the two classes + // to avoid this issue entirely. + StringRef getNameForDiagnostic(StringRef Filename, + const DiagnosticOptions &Opts) const; private: friend class ASTReader; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index c726f86e2d4e0..7fa638567c55c 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -2404,19 +2404,36 @@ SourceManagerForFile::SourceManagerForFile(StringRef FileName, SourceMgr->setMainFileID(ID); } -StringRef SourceManager::getNameForDiagnostic(StringRef Filename) const { +StringRef +SourceManager::getNameForDiagnostic(StringRef Filename, + const DiagnosticOptions &Opts) const { OptionalFileEntryRef File = getFileManager().getOptionalFileRef(Filename); if (!File) return Filename; - // Try to simplify paths that contain '..' in any case since paths to - // standard library headers especially tend to get quite long otherwise. - // Only do that for local filesystems though to avoid slowing down - // compilation too much. - bool Absolute = Diag.getDiagnosticOptions().AbsolutePath; - bool AlwaysSimplify = File->getName().contains("..") && - llvm::sys::fs::is_local(File->getName()); - if (!Absolute && !AlwaysSimplify) + bool SimplifyPath = [&] { + if (Opts.AbsolutePath) + return true; + + // Try to simplify paths that contain '..' in any case since paths to + // standard library headers especially tend to get quite long otherwise. + // Only do that for local filesystems though to avoid slowing down + // compilation too much. + if (!File->getName().contains("..")) + return false; + + // If we're not on Windows, check if we're on a network file system and + // avoid simplifying the path in that case since that can be slow. On + // Windows, the check for a local filesystem is already slow, so skip it. +#ifndef _WIN32 + if (!llvm::sys::fs::is_local(File->getName())) + return false; +#endif + + return true; + }(); + + if (!SimplifyPath) return Filename; // This may involve computing canonical names, so cache the result. @@ -2451,7 +2468,7 @@ StringRef SourceManager::getNameForDiagnostic(StringRef Filename) const { // In some cases, the resolved path may actually end up being longer (e.g. // if it was originally a relative path), so just retain whichever one // ends up being shorter. - if (!Absolute && TempBuf.size() > Filename.size()) + if (!Opts.AbsolutePath && TempBuf.size() > Filename.size()) CacheEntry = Filename; else CacheEntry = TempBuf.str().copy(DiagNameAlloc); diff --git a/clang/lib/Frontend/SARIFDiagnostic.cpp b/clang/lib/Frontend/SARIFDiagnostic.cpp index c2f6ece2e91ff..b63345857baf1 100644 --- a/clang/lib/Frontend/SARIFDiagnostic.cpp +++ b/clang/lib/Frontend/SARIFDiagnostic.cpp @@ -163,7 +163,7 @@ SARIFDiagnostic::addDiagnosticLevelToRule(SarifRule Rule, llvm::StringRef SARIFDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { - return SM.getNameForDiagnostic(Filename); + return SM.getNameForDiagnostic(Filename, DiagOpts); } /// Print out the file/line/column information and include trace. diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp index 354331f920aa4..174b4d30d2847 100644 --- a/clang/lib/Frontend/TextDiagnostic.cpp +++ b/clang/lib/Frontend/TextDiagnostic.cpp @@ -738,7 +738,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, } void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { - OS << SM.getNameForDiagnostic(Filename); + OS << SM.getNameForDiagnostic(Filename, DiagOpts); } /// Print out the file/line/column information and include trace. diff --git a/clang/test/Frontend/absolute-paths.c b/clang/test/Frontend/absolute-paths.c index e06cf262dd8e2..e507f0d4c34b3 100644 --- a/clang/test/Frontend/absolute-paths.c +++ b/clang/test/Frontend/absolute-paths.c @@ -8,9 +8,9 @@ #include "absolute-paths.h" -// Check whether the diagnostic from the header above includes the dummy -// directory in the path. -// NORMAL: SystemHeaderPrefix +// Check that the bogus prefix we added is stripped out even if absolute paths +// are disabled. +// NORMAL-NOT: SystemHeaderPrefix // ABSOLUTE-NOT: SystemHeaderPrefix // CHECK: warning: non-void function does not return a value >From c0f56c3dc484962c0a2916fb652df1304381d274 Mon Sep 17 00:00:00 2001 From: Sirraide <aeternalm...@gmail.com> Date: Wed, 2 Jul 2025 12:50:37 +0200 Subject: [PATCH 5/6] remove unused member --- clang/include/clang/Frontend/TextDiagnostic.h | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h index 9c77bc3e00e19..e2e88d4d648a2 100644 --- a/clang/include/clang/Frontend/TextDiagnostic.h +++ b/clang/include/clang/Frontend/TextDiagnostic.h @@ -35,7 +35,6 @@ namespace clang { class TextDiagnostic : public DiagnosticRenderer { raw_ostream &OS; const Preprocessor *PP; - llvm::StringMap<SmallString<128>> SimplifiedFileNameCache; public: TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, >From a91e71182639a5f0717d80334c41888b3f2a9fdc Mon Sep 17 00:00:00 2001 From: Sirraide <aeternalm...@gmail.com> Date: Fri, 4 Jul 2025 15:45:50 +0200 Subject: [PATCH 6/6] fix clang-tidy test --- .../clang-tidy/infrastructure/file-filter-symlinks.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/file-filter-symlinks.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/file-filter-symlinks.cpp index 7efa7d070f69f..30bcdc3f87743 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/file-filter-symlinks.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/file-filter-symlinks.cpp @@ -12,8 +12,9 @@ // RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='header\.h' %s -- -I %t 2>&1 | FileCheck --check-prefix=CHECK_HEADER %s // RUN: clang-tidy -checks='-*,google-explicit-constructor' -header-filter='header\.h' -quiet %s -- -I %t 2>&1 | FileCheck --check-prefix=CHECK_HEADER %s -// Check that `-header-filter` operates on the same file paths as paths in -// diagnostics printed by ClangTidy. +// `-header-filter` operates on the actual file path that the user provided in +// the #include directive; however, Clang's path name simplification causes the +// path to be printed in canonicalised form here. #include "dir1/dir2/../header_alias.h" -// CHECK_HEADER_ALIAS: dir1/dir2/../header_alias.h:1:11: warning: single-argument constructors +// CHECK_HEADER_ALIAS: dir1/header.h:1:11: warning: single-argument constructors // CHECK_HEADER-NOT: warning: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits