https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/167280
>From da60e0a7b67c7ac6b8c641f3fe6e11df5dda7850 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Sun, 9 Nov 2025 23:19:56 -0800 Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.7 [skip ci] --- clang/lib/Basic/Diagnostic.cpp | 2 +- clang/lib/Basic/ProfileList.cpp | 2 +- clang/lib/Basic/SanitizerSpecialCaseList.cpp | 4 +- llvm/include/llvm/Support/SpecialCaseList.h | 51 ++++++----- llvm/lib/Support/SpecialCaseList.cpp | 93 +++++++++++--------- 5 files changed, 85 insertions(+), 67 deletions(-) diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 2dec26ecacf26..5e9da245e2b43 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -534,7 +534,7 @@ WarningsSpecialCaseList::create(const llvm::MemoryBuffer &Input, void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) { static constexpr auto WarningFlavor = clang::diag::Flavor::WarningOrError; for (const auto &SectionEntry : sections()) { - StringRef DiagGroup = SectionEntry.SectionStr; + StringRef DiagGroup = SectionEntry.name(); if (DiagGroup == "*") { // Drop the default section introduced by special case list, we only // support exact diagnostic group names. diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp index 9cb118893a0d9..8727057eb78d1 100644 --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -36,7 +36,7 @@ class ProfileSpecialCaseList : public llvm::SpecialCaseList { bool hasPrefix(StringRef Prefix) const { for (const auto &It : sections()) - if (It.Entries.count(Prefix) > 0) + if (It.hasPrefix(Prefix)) return true; return false; } diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp index 56f551628cf89..928c086898097 100644 --- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp +++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp @@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() { SanitizerMask Mask; #define SANITIZER(NAME, ID) \ - if (S.SectionMatcher.matchAny(NAME)) \ + if (S.matchName(NAME)) \ Mask |= SanitizerKind::ID; #define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID) @@ -68,7 +68,7 @@ SanitizerSpecialCaseList::inSectionBlame(SanitizerMask Mask, StringRef Prefix, if (S.Mask & Mask) { unsigned LineNum = S.S.getLastMatch(Prefix, Query, Category); if (LineNum > 0) - return {S.S.FileIdx, LineNum}; + return {S.S.fileIndex(), LineNum}; } } return NotFound; diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index cb8e568de02e0..8c4dc94ae54ce 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -126,15 +126,16 @@ class SpecialCaseList { SpecialCaseList &operator=(SpecialCaseList const &) = delete; private: + using Match = std::pair<StringRef, unsigned>; + static constexpr Match NotMatched = {"", 0}; + // Lagacy v1 matcher. class RegexMatcher { public: LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber); LLVM_ABI void preprocess(bool BySize); - LLVM_ABI void - match(StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const; + LLVM_ABI Match match(StringRef Query) const; struct Reg { Reg(StringRef Name, unsigned LineNo, Regex &&Rg) @@ -152,9 +153,7 @@ class SpecialCaseList { LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber); LLVM_ABI void preprocess(bool BySize); - LLVM_ABI void - match(StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const; + LLVM_ABI Match match(StringRef Query) const; struct Glob { Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern) @@ -168,11 +167,10 @@ class SpecialCaseList { RadixTree<iterator_range<StringRef::const_iterator>, RadixTree<iterator_range<StringRef::const_reverse_iterator>, - SmallVector<const GlobMatcher::Glob *, 1>>> + SmallVector<int, 1>>> PrefixSuffixToGlob; - RadixTree<iterator_range<StringRef::const_iterator>, - SmallVector<const GlobMatcher::Glob *, 1>> + RadixTree<iterator_range<StringRef::const_iterator>, SmallVector<int, 1>> SubstrToGlob; }; @@ -184,14 +182,10 @@ class SpecialCaseList { LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber); LLVM_ABI void preprocess(bool BySize); - LLVM_ABI void - match(StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const; + LLVM_ABI Match match(StringRef Query) const; LLVM_ABI bool matchAny(StringRef Query) const { - bool R = false; - match(Query, [&](StringRef, unsigned) { R = true; }); - return R; + return match(Query) != NotMatched; } std::variant<RegexMatcher, GlobMatcher> M; @@ -201,17 +195,22 @@ class SpecialCaseList { using SectionEntries = StringMap<StringMap<Matcher>>; protected: - struct Section { - Section(StringRef Str, unsigned FileIdx, bool UseGlobs) - : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false), SectionStr(Str), + class Section { + public: + Section(StringRef Name, unsigned FileIdx, bool UseGlobs) + : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false), Name(Name), FileIdx(FileIdx) {} Section(Section &&) = default; - Matcher SectionMatcher; - SectionEntries Entries; - std::string SectionStr; - unsigned FileIdx; + // Return name of the section, it's entire string in []. + StringRef name() const { return Name; } + + // Returns true of string 'Name' matches section name interpreted as a glob. + LLVM_ABI bool matchName(StringRef Name) const; + + // Return sequence number of the file where this section is defined. + unsigned fileIndex() const { return FileIdx; } // Helper method to search by Prefix, Query, and Category. Returns // 1-based line number on which rule is defined, or 0 if there is no match. @@ -223,11 +222,19 @@ class SpecialCaseList { LLVM_ABI StringRef getLongestMatch(StringRef Prefix, StringRef Query, StringRef Category) const; + /// Returns true if the section has any entries for the given prefix. + LLVM_ABI bool hasPrefix(StringRef Prefix) const; + private: friend class SpecialCaseList; LLVM_ABI void preprocess(bool OrderBySize); LLVM_ABI const SpecialCaseList::Matcher * findMatcher(StringRef Prefix, StringRef Category) const; + + Matcher SectionMatcher; + StringRef Name; + SectionEntries Entries; + unsigned FileIdx; }; ArrayRef<const Section> sections() const { return Sections; } diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 246d90cce3a43..42c8933a43399 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -15,11 +15,13 @@ #include "llvm/Support/SpecialCaseList.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> #include <limits> #include <memory> @@ -63,12 +65,12 @@ void SpecialCaseList::RegexMatcher::preprocess(bool BySize) { } } -void SpecialCaseList::RegexMatcher::match( - StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { +SpecialCaseList::Match +SpecialCaseList::RegexMatcher::match(StringRef Query) const { for (const auto &R : reverse(RegExes)) if (R.Rg.match(Query)) - return Cb(R.Name, R.LineNo); + return {R.Name, R.LineNo}; + return NotMatched; } Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern, @@ -90,7 +92,7 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { }); } - for (const auto &G : reverse(Globs)) { + for (const auto &[Idx, G] : enumerate(Globs)) { StringRef Prefix = G.Pattern.prefix(); StringRef Suffix = G.Pattern.suffix(); @@ -102,26 +104,29 @@ void SpecialCaseList::GlobMatcher::preprocess(bool BySize) { // But only if substring is not empty. Searching this tree is more // expensive. auto &V = SubstrToGlob.emplace(Substr).first->second; - V.emplace_back(&G); + V.emplace_back(Idx); continue; } } auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second; auto &V = SToGlob.emplace(reverse(Suffix)).first->second; - V.emplace_back(&G); + V.emplace_back(Idx); } } -void SpecialCaseList::GlobMatcher::match( - StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { +SpecialCaseList::Match +SpecialCaseList::GlobMatcher::match(StringRef Query) const { + int Best = -1; if (!PrefixSuffixToGlob.empty()) { for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) { for (const auto &[_, V] : SToGlob.find_prefixes(reverse(Query))) { - for (const auto *G : V) { - if (G->Pattern.match(Query)) { - Cb(G->Name, G->LineNo); + for (int Idx : reverse(V)) { + if (Best > Idx) + break; + const GlobMatcher::Glob &G = Globs[Idx]; + if (G.Pattern.match(Query)) { + Best = Idx; // As soon as we find a match in the vector, we can break for this // vector, since the globs are already sorted by priority within the // prefix group. However, we continue searching other prefix groups @@ -138,9 +143,12 @@ void SpecialCaseList::GlobMatcher::match( // possibilities. In most cases search will fail on first characters. for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) { for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) { - for (const auto *G : V) { - if (G->Pattern.match(Query)) { - Cb(G->Name, G->LineNo); + for (int Idx : reverse(V)) { + if (Best > Idx) + break; + const GlobMatcher::Glob &G = Globs[Idx]; + if (G.Pattern.match(Query)) { + Best = Idx; // As soon as we find a match in the vector, we can break for this // vector, since the globs are already sorted by priority within the // prefix group. However, we continue searching other prefix groups @@ -151,6 +159,9 @@ void SpecialCaseList::GlobMatcher::match( } } } + if (Best < 0) + return NotMatched; + return {Globs[Best].Name, Globs[Best].LineNo}; } SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) @@ -169,12 +180,11 @@ void SpecialCaseList::Matcher::preprocess(bool BySize) { return std::visit([&](auto &V) { return V.preprocess(BySize); }, M); } -void SpecialCaseList::Matcher::match( - StringRef Query, - llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const { +SpecialCaseList::Match SpecialCaseList::Matcher::match(StringRef Query) const { if (RemoveDotSlash) Query = llvm::sys::path::remove_leading_dotslash(Query); - return std::visit([&](auto &V) { return V.match(Query, Cb); }, M); + return std::visit( + [&](auto &V) -> SpecialCaseList::Match { return V.match(Query); }, M); } // TODO: Refactor this to return Expected<...> @@ -233,10 +243,10 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error, Expected<SpecialCaseList::Section *> SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, unsigned LineNo, bool UseGlobs) { + SectionStr = SectionStr.copy(StrAlloc); Sections.emplace_back(SectionStr, FileNo, UseGlobs); auto &Section = Sections.back(); - SectionStr = SectionStr.copy(StrAlloc); if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) { return createStringError(errc::invalid_argument, "malformed section at line " + Twine(LineNo) + @@ -264,11 +274,12 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, bool RemoveDotSlash = Version > 2; - Section *CurrentSection; - if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) { + auto ErrOrSection = addSection("*", FileIdx, 1, true); + if (auto Err = ErrOrSection.takeError()) { Error = toString(std::move(Err)); return false; } + Section *CurrentSection = ErrOrSection.get(); // This is the current list of prefixes for all existing users matching file // path. We may need parametrization in constructor in future. @@ -290,12 +301,13 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, return false; } - if (auto Err = addSection(Line.drop_front().drop_back(), FileIdx, LineNo, - UseGlobs) - .moveInto(CurrentSection)) { + auto ErrOrSection = + addSection(Line.drop_front().drop_back(), FileIdx, LineNo, UseGlobs); + if (auto Err = ErrOrSection.takeError()) { Error = toString(std::move(Err)); return false; } + CurrentSection = ErrOrSection.get(); continue; } @@ -348,6 +360,10 @@ SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, return NotFound; } +bool SpecialCaseList::Section::matchName(StringRef Name) const { + return SectionMatcher.matchAny(Name); +} + const SpecialCaseList::Matcher * SpecialCaseList::Section::findMatcher(StringRef Prefix, StringRef Category) const { @@ -371,26 +387,21 @@ LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) { unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const { - unsigned LastLine = 0; - if (const Matcher *M = findMatcher(Prefix, Category)) { - M->match(Query, [&](StringRef, unsigned LineNo) { - LastLine = std::max(LastLine, LineNo); - }); - } - return LastLine; + if (const Matcher *M = findMatcher(Prefix, Category)) + return M->match(Query).second; + return 0; } StringRef SpecialCaseList::Section::getLongestMatch(StringRef Prefix, StringRef Query, StringRef Category) const { - StringRef LongestRule; - if (const Matcher *M = findMatcher(Prefix, Category)) { - M->match(Query, [&](StringRef Rule, unsigned) { - if (LongestRule.size() < Rule.size()) - LongestRule = Rule; - }); - } - return LongestRule; + if (const Matcher *M = findMatcher(Prefix, Category)) + return M->match(Query).first; + return {}; +} + +bool SpecialCaseList::Section::hasPrefix(StringRef Prefix) const { + return Entries.find(Prefix) != Entries.end(); } } // namespace llvm >From 51826b0356ac42159df4bc66a9588816997adff0 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Mon, 10 Nov 2025 18:29:16 -0800 Subject: [PATCH 2/5] Update llvm/lib/Support/SpecialCaseList.cpp Co-authored-by: Copilot <[email protected]> --- llvm/lib/Support/SpecialCaseList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index cf01f59fa37f6..3e959b490d200 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -41,7 +41,7 @@ namespace { using Match = std::pair<StringRef, unsigned>; static constexpr Match NotMatched = {"", 0}; -// Lagacy v1 matcher. +// Legacy v1 matcher. class RegexMatcher { public: Error insert(StringRef Pattern, unsigned LineNumber); >From 93874c3244e3afea5db4dc886347bc2e53ed6fbd Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Mon, 10 Nov 2025 18:30:43 -0800 Subject: [PATCH 3/5] rebase Created using spr 1.3.7 --- llvm/lib/Support/SpecialCaseList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 3e959b490d200..cf01f59fa37f6 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -41,7 +41,7 @@ namespace { using Match = std::pair<StringRef, unsigned>; static constexpr Match NotMatched = {"", 0}; -// Legacy v1 matcher. +// Lagacy v1 matcher. class RegexMatcher { public: Error insert(StringRef Pattern, unsigned LineNumber); >From 1b2dbb89ded3e9921a3ca0a29560e57d24111e1d Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Mon, 10 Nov 2025 19:45:32 -0800 Subject: [PATCH 4/5] remove redundunt methods Created using spr 1.3.7 --- llvm/lib/Support/SpecialCaseList.cpp | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index cf01f59fa37f6..7baca223171d7 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -253,28 +253,17 @@ Match Matcher::match(StringRef Query) const { } // namespace class SpecialCaseList::Section::SectionImpl { - friend class SpecialCaseList; +public: void preprocess(bool OrderBySize); const Matcher *findMatcher(StringRef Prefix, StringRef Category) const; -public: using SectionEntries = StringMap<StringMap<Matcher>>; - SectionImpl(StringRef Str, bool UseGlobs) + explicit SectionImpl(bool UseGlobs) : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false) {} Matcher SectionMatcher; SectionEntries Entries; - - // Helper method to search by Prefix, Query, and Category. Returns - // 1-based line number on which rule is defined, or 0 if there is no match. - unsigned getLastMatch(StringRef Prefix, StringRef Query, - StringRef Category) const; - - // Helper method to search by Prefix, Query, and Category. Returns - // matching rule, or empty string if there is no match. - StringRef getLongestMatch(StringRef Prefix, StringRef Query, - StringRef Category) const; }; // TODO: Refactor this to return Expected<...> @@ -457,9 +446,10 @@ SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, SpecialCaseList::Section::Section(StringRef Str, unsigned FileIdx, bool UseGlobs) : Name(Str), FileIdx(FileIdx), - Impl(std::make_unique<SectionImpl>(Str, UseGlobs)) {} + Impl(std::make_unique<SectionImpl>(UseGlobs)) {} SpecialCaseList::Section::Section(Section &&) = default; + SpecialCaseList::Section::~Section() = default; bool SpecialCaseList::Section::matchName(StringRef Name) const { >From 281882459de640756ade6261aa1ccd49bdec576b Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Mon, 10 Nov 2025 19:51:55 -0800 Subject: [PATCH 5/5] update Created using spr 1.3.7 --- llvm/include/llvm/Support/SpecialCaseList.h | 2 +- llvm/lib/Support/SpecialCaseList.cpp | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index 8fc410ec44c85..5a012cf0c0264 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -154,7 +154,7 @@ class SpecialCaseList { std::unique_ptr<SectionImpl> Impl; }; - const std::vector<Section> §ions() const; + ArrayRef<const Section> sections() const { return Sections; } private: BumpPtrAllocator StrAlloc; diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 7baca223171d7..91f98cf7fac6c 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -319,10 +319,6 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, std::string &Error, return true; } -const std::vector<SpecialCaseList::Section> &SpecialCaseList::sections() const { - return Sections; -} - Expected<SpecialCaseList::Section *> SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, unsigned LineNo, bool UseGlobs) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
