llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tools-extra Author: Daniil Dudkin (unterumarmung) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/183941.diff 4 Files Affected: - (modified) clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp (+7-34) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.cpp (+53) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.h (+8) - (modified) clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp (+206) ``````````diff diff --git a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp index ac604b7b9f1b4..ba3af5762f27e 100644 --- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "ExplicitConstructorCheck.h" +#include "../utils/LexerUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Lex/Lexer.h" using namespace clang::ast_matchers; @@ -31,32 +31,6 @@ void ExplicitConstructorCheck::registerMatchers(MatchFinder *Finder) { this); } -// Looks for the token matching the predicate and returns the range of the found -// token including trailing whitespace. -static SourceRange findToken(const SourceManager &Sources, - const LangOptions &LangOpts, - SourceLocation StartLoc, SourceLocation EndLoc, - bool (*Pred)(const Token &)) { - if (StartLoc.isMacroID() || EndLoc.isMacroID()) - return {}; - const FileID File = Sources.getFileID(Sources.getSpellingLoc(StartLoc)); - const StringRef Buf = Sources.getBufferData(File); - const char *StartChar = Sources.getCharacterData(StartLoc); - Lexer Lex(StartLoc, LangOpts, StartChar, StartChar, Buf.end()); - Lex.SetCommentRetentionState(true); - Token Tok; - do { - Lex.LexFromRawLexer(Tok); - if (Pred(Tok)) { - Token NextTok; - Lex.LexFromRawLexer(NextTok); - return {Tok.getLocation(), NextTok.getLocation()}; - } - } while (Tok.isNot(tok::eof) && Tok.getLocation() < EndLoc); - - return {}; -} - static bool declIsStdInitializerList(const NamedDecl *D) { // First use the fast getName() method to avoid unnecessary calls to the // slow getQualifiedNameAsString(). @@ -113,9 +87,10 @@ void ExplicitConstructorCheck::check(const MatchFinder::MatchResult &Result) { return Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "explicit"; }; - const SourceRange ExplicitTokenRange = - findToken(*Result.SourceManager, getLangOpts(), - Ctor->getOuterLocStart(), Ctor->getEndLoc(), IsKwExplicit); + const CharSourceRange ConstructorRange = CharSourceRange::getTokenRange( + Ctor->getOuterLocStart(), Ctor->getEndLoc()); + const CharSourceRange ExplicitTokenRange = utils::lexer::findTokenInRange( + ConstructorRange, *Result.SourceManager, getLangOpts(), IsKwExplicit); StringRef ConstructorDescription; if (Ctor->isMoveConstructor()) ConstructorDescription = "move"; @@ -127,10 +102,8 @@ void ExplicitConstructorCheck::check(const MatchFinder::MatchResult &Result) { auto Diag = diag(Ctor->getLocation(), "%0 constructor should not be declared explicit") << ConstructorDescription; - if (ExplicitTokenRange.isValid()) { - Diag << FixItHint::CreateRemoval( - CharSourceRange::getCharRange(ExplicitTokenRange)); - } + if (ExplicitTokenRange.isValid()) + Diag << FixItHint::CreateRemoval(ExplicitTokenRange); return; } diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index a9a8c7bbf4c89..e3b904cd1d29a 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -160,6 +160,59 @@ getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM, return Comments; } +CharSourceRange findTokenInRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts, + llvm::function_ref<bool(const Token &)> Pred) { + if (Range.isInvalid()) + return {}; + + // Normalize to a file-based char range so raw lexing can operate on one + // contiguous buffer and reject unmappable (e.g. macro) ranges. + const CharSourceRange FileRange = + Lexer::makeFileCharRange(Range, SM, LangOpts); + if (FileRange.isInvalid()) + return {}; + + const auto [BeginFID, BeginOffset] = + SM.getDecomposedLoc(FileRange.getBegin()); + const auto [EndFID, EndOffset] = SM.getDecomposedLoc(FileRange.getEnd()); + if (BeginFID != EndFID || BeginOffset > EndOffset) + return {}; + + bool Invalid = false; + const StringRef Buffer = SM.getBufferData(BeginFID, &Invalid); + if (Invalid) + return {}; + + const char *LexStart = Buffer.data() + BeginOffset; + // Re-lex raw tokens in the bounded file buffer while preserving comments so + // callers can match tokens regardless of interleaved comments. + Lexer TheLexer(SM.getLocForStartOfFile(BeginFID), LangOpts, Buffer.begin(), + LexStart, Buffer.end()); + TheLexer.SetCommentRetentionState(true); + + while (true) { + Token Tok; + if (TheLexer.LexFromRawLexer(Tok)) + return {}; + + if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() || + SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation())) + return {}; + + if (!Pred(Tok)) + continue; + + Token NextTok; + if (TheLexer.LexFromRawLexer(NextTok)) + return {}; + // Return a char range ending at the next token start so trailing trivia of + // the matched token is included (useful for fix-it removals). + return CharSourceRange::getCharRange(Tok.getLocation(), + NextTok.getLocation()); + } +} + std::optional<Token> getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h index 38123ae14cff7..bd72a876bf71f 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h @@ -12,6 +12,7 @@ #include "clang/AST/ASTContext.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include <optional> #include <utility> #include <vector> @@ -126,6 +127,13 @@ std::vector<CommentToken> getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts); +/// Returns the first token in \p Range matching \p Pred. +/// The returned char range starts at the matched token and ends at the start +/// of the next token. Returns invalid range if no token matches. +CharSourceRange findTokenInRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts, + llvm::function_ref<bool(const Token &)> Pred); + /// Assuming that ``Range`` spans a CVR-qualified type, returns the /// token in ``Range`` that is responsible for the qualification. ``Range`` /// must be valid with respect to ``SM``. Returns ``std::nullopt`` if no diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp index 438a78b4694ee..aa52424812fe6 100644 --- a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp @@ -8,6 +8,7 @@ #include "../clang-tidy/utils/LexerUtils.h" +#include "clang/AST/DeclCXX.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/ASTUnit.h" @@ -41,8 +42,213 @@ static CharSourceRange rangeFromAnnotations(const llvm::Annotations &A, return CharSourceRange::getCharRange(Begin, End); } +static bool isRawIdentifierNamed(const Token &Tok, StringRef Name) { + return Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name; +} + +static const CXXConstructorDecl * +findFirstNonImplicitConstructor(const ASTContext &Context) { + for (const Decl *D : Context.getTranslationUnitDecl()->decls()) { + const auto *RD = dyn_cast<CXXRecordDecl>(D); + if (!RD) + continue; + for (const CXXConstructorDecl *Ctor : RD->ctors()) + if (!Ctor->isImplicit()) + return Ctor; + } + return nullptr; +} + namespace { +TEST(LexerUtilsTest, FindTokenInRangeFindsMatch) { + llvm::Annotations Code(R"cpp( +struct S { + $range[[explicit ]] S(int); +}; +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange SearchRange = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + SearchRange, SM, LangOpts, + [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); }); + ASSERT_TRUE(MatchedRange.isValid()); + + const StringRef CodeText = Code.code(); + const size_t ExplicitOffset = CodeText.find("explicit"); + ASSERT_NE(StringRef::npos, ExplicitOffset); + const size_t ConstructorOffset = CodeText.find("S(int)"); + ASSERT_NE(StringRef::npos, ConstructorOffset); + EXPECT_EQ(ExplicitOffset, SM.getFileOffset(MatchedRange.getBegin())); + EXPECT_EQ(ConstructorOffset, SM.getFileOffset(MatchedRange.getEnd())); +} + +TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidWhenNotFound) { + llvm::Annotations Code(R"cpp( +struct S { + $range[[int x = 0;]] + S(int); +}; +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange SearchRange = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + SearchRange, SM, LangOpts, + [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + +TEST(LexerUtilsTest, FindTokenInRangeDoesNotMatchTokenAtEndBoundary) { + llvm::Annotations Code(R"cpp( +struct S { + $range[[int x = 0; ]]explicit S(int); +}; +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange SearchRange = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + SearchRange, SM, LangOpts, + [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + +TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidWhenPredicateNeverMatches) { + llvm::Annotations Code(R"cpp( +struct S { + $range[[explicit ]] S(int); +}; +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange SearchRange = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + SearchRange, SM, LangOpts, [](const Token &) { return false; }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + +TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidForInvalidRange) { + std::unique_ptr<ASTUnit> AST = buildAST("struct S { explicit S(int); };"); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + CharSourceRange(), SM, LangOpts, [](const Token &) { return true; }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + +TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidForReversedOffsets) { + llvm::Annotations Code(R"cpp( +struct S { + $a^explicit S(int);$b^ +}; +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const SourceLocation MainFileStart = + SM.getLocForStartOfFile(SM.getMainFileID()); + const SourceLocation Begin = MainFileStart.getLocWithOffset(Code.point("b")); + const SourceLocation End = MainFileStart.getLocWithOffset(Code.point("a")); + ASSERT_TRUE(SM.isBeforeInTranslationUnit(End, Begin)); + + const CharSourceRange ReversedRange = + CharSourceRange::getCharRange(Begin, End); + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + ReversedRange, SM, LangOpts, + [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + +TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidWhenFileRangeIsInvalid) { + llvm::Annotations Code(R"cpp( +#include "header.h" +int $begin^main_var = 0; +)cpp"); + const FileContentMappings Mappings = { + {"header.h", "int header_var = 0;\n"}, + }; + std::unique_ptr<ASTUnit> AST = buildAST(Code.code(), Mappings); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const SourceLocation MainFileStart = + SM.getLocForStartOfFile(SM.getMainFileID()); + const SourceLocation Begin = + MainFileStart.getLocWithOffset(Code.point("begin")); + ASSERT_TRUE(Begin.isFileID()); + + auto HeaderFile = AST->getFileManager().getOptionalFileRef("header.h"); + ASSERT_TRUE(HeaderFile.has_value()); + const FileID HeaderFID = SM.translateFile(*HeaderFile); + ASSERT_TRUE(HeaderFID.isValid()); + const SourceLocation HeaderBegin = SM.getLocForStartOfFile(HeaderFID); + ASSERT_TRUE(HeaderBegin.isFileID()); + + const CharSourceRange SearchRange = + CharSourceRange::getCharRange(Begin, HeaderBegin); + const CharSourceRange FileRange = + Lexer::makeFileCharRange(SearchRange, SM, LangOpts); + EXPECT_TRUE(FileRange.isInvalid()); + + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + SearchRange, SM, LangOpts, [](const Token &) { return true; }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + +TEST(LexerUtilsTest, FindTokenInRangeReturnsInvalidForMacroRange) { + std::unique_ptr<ASTUnit> AST = buildAST(R"cpp( +#define EXPLICIT explicit +struct S { + EXPLICIT S(int); +}; +)cpp"); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CXXConstructorDecl *Ctor = findFirstNonImplicitConstructor(Context); + ASSERT_NE(nullptr, Ctor); + ASSERT_TRUE(Ctor->getOuterLocStart().isMacroID()); + + const CharSourceRange SearchRange = CharSourceRange::getTokenRange( + Ctor->getOuterLocStart(), Ctor->getEndLoc()); + const CharSourceRange MatchedRange = utils::lexer::findTokenInRange( + SearchRange, SM, LangOpts, + [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); }); + EXPECT_TRUE(MatchedRange.isInvalid()); +} + TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) { llvm::Annotations Code(R"cpp( void f() { `````````` </details> https://github.com/llvm/llvm-project/pull/183941 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
