[clang-tools-extra] [clang-tidy][NFC] Add `findTokenInRange` and reuse it (PR #183941)

via cfe-commits Sat, 28 Feb 2026 11:59:34 -0800

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-tools-extra

Author: Daniil Dudkin (unterumarmung)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/183941.diff


4 Files Affected:

- (modified) clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp 
(+7-34) 
- (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.cpp (+53) 
- (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.h (+8) 
- (modified) clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp (+206) 


``````````diff
diff --git a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp 
b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
index ac604b7b9f1b4..ba3af5762f27e 100644
--- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
@@ -7,10 +7,10 @@
 
//===----------------------------------------------------------------------===//
 
 #include "ExplicitConstructorCheck.h"
+#include "../utils/LexerUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Lex/Lexer.h"
 
 using namespace clang::ast_matchers;
 
@@ -31,32 +31,6 @@ void ExplicitConstructorCheck::registerMatchers(MatchFinder 
*Finder) {
       this);
 }
 
-// Looks for the token matching the predicate and returns the range of the 
found
-// token including trailing whitespace.
-static SourceRange findToken(const SourceManager &Sources,
-                             const LangOptions &LangOpts,
-                             SourceLocation StartLoc, SourceLocation EndLoc,
-                             bool (*Pred)(const Token &)) {
-  if (StartLoc.isMacroID() || EndLoc.isMacroID())
-    return {};
-  const FileID File = Sources.getFileID(Sources.getSpellingLoc(StartLoc));
-  const StringRef Buf = Sources.getBufferData(File);
-  const char *StartChar = Sources.getCharacterData(StartLoc);
-  Lexer Lex(StartLoc, LangOpts, StartChar, StartChar, Buf.end());
-  Lex.SetCommentRetentionState(true);
-  Token Tok;
-  do {
-    Lex.LexFromRawLexer(Tok);
-    if (Pred(Tok)) {
-      Token NextTok;
-      Lex.LexFromRawLexer(NextTok);
-      return {Tok.getLocation(), NextTok.getLocation()};
-    }
-  } while (Tok.isNot(tok::eof) && Tok.getLocation() < EndLoc);
-
-  return {};
-}
-
 static bool declIsStdInitializerList(const NamedDecl *D) {
   // First use the fast getName() method to avoid unnecessary calls to the
   // slow getQualifiedNameAsString().
@@ -113,9 +87,10 @@ void ExplicitConstructorCheck::check(const 
MatchFinder::MatchResult &Result) {
       return Tok.is(tok::raw_identifier) &&
              Tok.getRawIdentifier() == "explicit";
     };
-    const SourceRange ExplicitTokenRange =
-        findToken(*Result.SourceManager, getLangOpts(),
-                  Ctor->getOuterLocStart(), Ctor->getEndLoc(), IsKwExplicit);
+    const CharSourceRange ConstructorRange = CharSourceRange::getTokenRange(
+        Ctor->getOuterLocStart(), Ctor->getEndLoc());
+    const CharSourceRange ExplicitTokenRange = utils::lexer::findTokenInRange(
+        ConstructorRange, *Result.SourceManager, getLangOpts(), IsKwExplicit);
     StringRef ConstructorDescription;
     if (Ctor->isMoveConstructor())
       ConstructorDescription = "move";
@@ -127,10 +102,8 @@ void ExplicitConstructorCheck::check(const 
MatchFinder::MatchResult &Result) {
     auto Diag = diag(Ctor->getLocation(),
                      "%0 constructor should not be declared explicit")
                 << ConstructorDescription;
-    if (ExplicitTokenRange.isValid()) {
-      Diag << FixItHint::CreateRemoval(
-          CharSourceRange::getCharRange(ExplicitTokenRange));
-    }
+    if (ExplicitTokenRange.isValid())
+      Diag << FixItHint::CreateRemoval(ExplicitTokenRange);
     return;
   }
 
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp 
b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index a9a8c7bbf4c89..e3b904cd1d29a 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -160,6 +160,59 @@ getTrailingCommentsInRange(CharSourceRange Range, const 
SourceManager &SM,
   return Comments;
 }
 
+CharSourceRange findTokenInRange(CharSourceRange Range, const SourceManager 
&SM,
+                                 const LangOptions &LangOpts,
+                                 llvm::function_ref<bool(const Token &)> Pred) 
{
+  if (Range.isInvalid())
+    return {};
+
+  // Normalize to a file-based char range so raw lexing can operate on one
+  // contiguous buffer and reject unmappable (e.g. macro) ranges.
+  const CharSourceRange FileRange =
+      Lexer::makeFileCharRange(Range, SM, LangOpts);
+  if (FileRange.isInvalid())
+    return {};
+
+  const auto [BeginFID, BeginOffset] =
+      SM.getDecomposedLoc(FileRange.getBegin());
+  const auto [EndFID, EndOffset] = SM.getDecomposedLoc(FileRange.getEnd());
+  if (BeginFID != EndFID || BeginOffset > EndOffset)
+    return {};
+
+  bool Invalid = false;
+  const StringRef Buffer = SM.getBufferData(BeginFID, &Invalid);
+  if (Invalid)
+    return {};
+
+  const char *LexStart = Buffer.data() + BeginOffset;
+  // Re-lex raw tokens in the bounded file buffer while preserving comments so
+  // callers can match tokens regardless of interleaved comments.
+  Lexer TheLexer(SM.getLocForStartOfFile(BeginFID), LangOpts, Buffer.begin(),
+                 LexStart, Buffer.end());
+  TheLexer.SetCommentRetentionState(true);
+
+  while (true) {
+    Token Tok;
+    if (TheLexer.LexFromRawLexer(Tok))
+      return {};
+
+    if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() ||
+        SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation()))
+      return {};
+
+    if (!Pred(Tok))
+      continue;
+
+    Token NextTok;
+    if (TheLexer.LexFromRawLexer(NextTok))
+      return {};
+    // Return a char range ending at the next token start so trailing trivia of
+    // the matched token is included (useful for fix-it removals).
+    return CharSourceRange::getCharRange(Tok.getLocation(),
+                                         NextTok.getLocation());
+  }
+}
+
 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
                                         CharSourceRange Range,
                                         const ASTContext &Context,
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h 
b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
index 38123ae14cff7..bd72a876bf71f 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@@ -12,6 +12,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include <optional>
 #include <utility>
 #include <vector>
@@ -126,6 +127,13 @@ std::vector<CommentToken>
 getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
                            const LangOptions &LangOpts);
 
+/// Returns the first token in \p Range matching \p Pred.
+/// The returned char range starts at the matched token and ends at the start
+/// of the next token. Returns invalid range if no token matches.
+CharSourceRange findTokenInRange(CharSourceRange Range, const SourceManager 
&SM,
+                                 const LangOptions &LangOpts,
+                                 llvm::function_ref<bool(const Token &)> Pred);
+
 /// Assuming that ``Range`` spans a CVR-qualified type, returns the
 /// token in ``Range`` that is responsible for the qualification. ``Range``
 /// must be valid with respect to ``SM``.  Returns ``std::nullopt`` if no
diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp 
b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
index 438a78b4694ee..aa52424812fe6 100644
--- a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
@@ -8,6 +8,7 @@
 
 #include "../clang-tidy/utils/LexerUtils.h"
 
+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/ASTUnit.h"
@@ -41,8 +42,213 @@ static CharSourceRange rangeFromAnnotations(const 
llvm::Annotations &A,
   return CharSourceRange::getCharRange(Begin, End);
 }
 
+static bool isRawIdentifierNamed(const Token &Tok, StringRef Name) {
+  return Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name;
+}
+
+static const CXXConstructorDecl *
+findFirstNonImplicitConstructor(const ASTContext &Context) {
+  for (const Decl *D : Context.getTranslationUnitDecl()->decls()) {
+    const auto *RD = dyn_cast<CXXRecordDecl>(D);
+    if (!RD)
+      continue;
+    for (const CXXConstructorDecl *Ctor : RD->ctors())
+      if (!Ctor->isImplicit())
+        return Ctor;
+  }
+  return nullptr;
+}
+
 namespace {
 
+TEST(LexerUtilsTest, FindTokenInRangeFindsMatch) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[explicit   ]] S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  ASSERT_TRUE(MatchedRange.isValid());
+
+  const StringRef CodeText = Code.code();
+  const size_t ExplicitOffset = CodeText.find("explicit");
+  ASSERT_NE(StringRef::npos, ExplicitOffset);
+  const size_t ConstructorOffset = CodeText.find("S(int)");
+  ASSERT_NE(StringRef::npos, ConstructorOffset);
+  EXPECT_EQ(ExplicitOffset, SM.getFileOffset(MatchedRange.getBegin()));
+  EXPECT_EQ(ConstructorOffset, SM.getFileOffset(MatchedRange.getEnd()));
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidWhenNotFound) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[int x = 0;]]
+  S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeDoesNotMatchTokenAtEndBoundary) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[int x = 0; ]]explicit S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidWhenPredicateNeverMatches) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[explicit ]] S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      SearchRange, SM, LangOpts, [](const Token &) { return false; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidForInvalidRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST("struct S { explicit S(int); };");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      CharSourceRange(), SM, LangOpts, [](const Token &) { return true; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidForReversedOffsets) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $a^explicit S(int);$b^
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const SourceLocation MainFileStart =
+      SM.getLocForStartOfFile(SM.getMainFileID());
+  const SourceLocation Begin = MainFileStart.getLocWithOffset(Code.point("b"));
+  const SourceLocation End = MainFileStart.getLocWithOffset(Code.point("a"));
+  ASSERT_TRUE(SM.isBeforeInTranslationUnit(End, Begin));
+
+  const CharSourceRange ReversedRange =
+      CharSourceRange::getCharRange(Begin, End);
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      ReversedRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidWhenFileRangeIsInvalid) {
+  llvm::Annotations Code(R"cpp(
+#include "header.h"
+int $begin^main_var = 0;
+)cpp");
+  const FileContentMappings Mappings = {
+      {"header.h", "int header_var = 0;\n"},
+  };
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code(), Mappings);
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const SourceLocation MainFileStart =
+      SM.getLocForStartOfFile(SM.getMainFileID());
+  const SourceLocation Begin =
+      MainFileStart.getLocWithOffset(Code.point("begin"));
+  ASSERT_TRUE(Begin.isFileID());
+
+  auto HeaderFile = AST->getFileManager().getOptionalFileRef("header.h");
+  ASSERT_TRUE(HeaderFile.has_value());
+  const FileID HeaderFID = SM.translateFile(*HeaderFile);
+  ASSERT_TRUE(HeaderFID.isValid());
+  const SourceLocation HeaderBegin = SM.getLocForStartOfFile(HeaderFID);
+  ASSERT_TRUE(HeaderBegin.isFileID());
+
+  const CharSourceRange SearchRange =
+      CharSourceRange::getCharRange(Begin, HeaderBegin);
+  const CharSourceRange FileRange =
+      Lexer::makeFileCharRange(SearchRange, SM, LangOpts);
+  EXPECT_TRUE(FileRange.isInvalid());
+
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      SearchRange, SM, LangOpts, [](const Token &) { return true; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidForMacroRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST(R"cpp(
+#define EXPLICIT explicit
+struct S {
+  EXPLICIT S(int);
+};
+)cpp");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CXXConstructorDecl *Ctor = findFirstNonImplicitConstructor(Context);
+  ASSERT_NE(nullptr, Ctor);
+  ASSERT_TRUE(Ctor->getOuterLocStart().isMacroID());
+
+  const CharSourceRange SearchRange = CharSourceRange::getTokenRange(
+      Ctor->getOuterLocStart(), Ctor->getEndLoc());
+  const CharSourceRange MatchedRange = utils::lexer::findTokenInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
 TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) {
   llvm::Annotations Code(R"cpp(
 void f() {

``````````

</details>


https://github.com/llvm/llvm-project/pull/183941
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] [clang-tidy][NFC] Add `findTokenInRange` and reuse it (PR #183941)

Reply via email to