Author: Saleem Abdulrasool Date: 2021-09-14T08:12:10-07:00 New Revision: d0d9e6f0849b2e76e980e2edf365302f47f4e35f
URL: https://github.com/llvm/llvm-project/commit/d0d9e6f0849b2e76e980e2edf365302f47f4e35f DIFF: https://github.com/llvm/llvm-project/commit/d0d9e6f0849b2e76e980e2edf365302f47f4e35f.diff LOG: clang-tidy: introduce readability-containter-data-pointer check This introduces a new check, readability-containter-data-pointer. This check is meant to catch the cases where the user may be trying to materialize the data pointer by taking the address of the 0-th member of a container. With C++11 or newer, the `data` member should be used for this. This provides the following benefits: - `.data()` is easier to read than `&[0]` - it avoids an unnecessary re-materialization of the pointer * this doesn't matter in the case of optimized code, but in the case of unoptimized code, this will be visible - it avoids a potential invalid memory de-reference caused by the indexing when the container is empty (in debug mode, clang will normally optimize away the re-materialization in optimized builds). The small potential behavioural change raises the question of where the check should belong. A reasoning of defense in depth applies here, and this does an unchecked conversion, with the assumption that users can use the static analyzer to catch cases where we can statically identify an invalid memory de-reference. For the cases where the static analysis is unable to prove the size of the container, UBSan can be used to track the invalid access. Special thanks to Aaron Ballmann for the discussion on whether this check would be useful and where to place it. This also partially resolves PR26817! Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D108893 Added: clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h clang-tools-extra/docs/clang-tidy/checks/readability-data-pointer.rst clang-tools-extra/test/clang-tidy/checkers/readability-container-data-pointer.cpp Modified: clang-tools-extra/clang-tidy/readability/CMakeLists.txt clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp clang-tools-extra/docs/ReleaseNotes.rst Removed: ################################################################################ diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index 78256d6f73251..eba0ab98cb37a 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -7,6 +7,7 @@ add_clang_library(clangTidyReadabilityModule AvoidConstParamsInDecls.cpp BracesAroundStatementsCheck.cpp ConstReturnTypeCheck.cpp + ContainerDataPointerCheck.cpp ContainerSizeEmptyCheck.cpp ConvertMemberFunctionsToStatic.cpp DeleteNullPointerCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp new file mode 100644 index 0000000000000..3a670509ec2e2 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp @@ -0,0 +1,117 @@ +//===--- ContainerDataPointerCheck.cpp - clang-tidy -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ContainerDataPointerCheck.h" + +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/StringRef.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace readability { +ContainerDataPointerCheck::ContainerDataPointerCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + +void ContainerDataPointerCheck::registerMatchers(MatchFinder *Finder) { + const auto Record = + cxxRecordDecl( + isSameOrDerivedFrom( + namedDecl( + has(cxxMethodDecl(isPublic(), hasName("data")).bind("data"))) + .bind("container"))) + .bind("record"); + + const auto NonTemplateContainerType = + qualType(hasUnqualifiedDesugaredType(recordType(hasDeclaration(Record)))); + const auto TemplateContainerType = + qualType(hasUnqualifiedDesugaredType(templateSpecializationType( + hasDeclaration(classTemplateDecl(has(Record)))))); + + const auto Container = + qualType(anyOf(NonTemplateContainerType, TemplateContainerType)); + + Finder->addMatcher( + unaryOperator( + unless(isExpansionInSystemHeader()), hasOperatorName("&"), + hasUnaryOperand(anyOf( + ignoringParenImpCasts( + cxxOperatorCallExpr( + callee(cxxMethodDecl(hasName("operator[]")) + .bind("operator[]")), + argumentCountIs(2), + hasArgument( + 0, + anyOf(ignoringParenImpCasts( + declRefExpr( + to(varDecl(anyOf( + hasType(Container), + hasType(references(Container)))))) + .bind("var")), + ignoringParenImpCasts(hasDescendant( + declRefExpr( + to(varDecl(anyOf( + hasType(Container), + hasType(pointsTo(Container)), + hasType(references(Container)))))) + .bind("var"))))), + hasArgument(1, + ignoringParenImpCasts( + integerLiteral(equals(0)).bind("zero")))) + .bind("operator-call")), + ignoringParenImpCasts( + cxxMemberCallExpr( + hasDescendant( + declRefExpr(to(varDecl(anyOf( + hasType(Container), + hasType(references(Container)))))) + .bind("var")), + argumentCountIs(1), + hasArgument(0, + ignoringParenImpCasts( + integerLiteral(equals(0)).bind("zero")))) + .bind("member-call")), + ignoringParenImpCasts( + arraySubscriptExpr( + hasLHS(ignoringParenImpCasts( + declRefExpr(to(varDecl(anyOf( + hasType(Container), + hasType(references(Container)))))) + .bind("var"))), + hasRHS(ignoringParenImpCasts( + integerLiteral(equals(0)).bind("zero")))) + .bind("array-subscript"))))) + .bind("address-of"), + this); +} + +void ContainerDataPointerCheck::check(const MatchFinder::MatchResult &Result) { + const auto *UO = Result.Nodes.getNodeAs<UnaryOperator>("address-of"); + const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("var"); + + std::string ReplacementText; + ReplacementText = std::string(Lexer::getSourceText( + CharSourceRange::getTokenRange(DRE->getSourceRange()), + *Result.SourceManager, getLangOpts())); + if (DRE->getType()->isPointerType()) + ReplacementText += "->data()"; + else + ReplacementText += ".data()"; + + FixItHint Hint = + FixItHint::CreateReplacement(UO->getSourceRange(), ReplacementText); + diag(UO->getBeginLoc(), + "'data' should be used for accessing the data pointer instead of taking " + "the address of the 0-th element") + << Hint; +} +} // namespace readability +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h new file mode 100644 index 0000000000000..0f0f8233f5d84 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h @@ -0,0 +1,45 @@ +//===--- ContainerDataPointerCheck.h - clang-tidy ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERDATAPOINTERCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERDATAPOINTERCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace readability { +/// Checks whether a call to `operator[]` and `&` can be replaced with a call to +/// `data()`. +/// +/// This only replaces the case where the offset being accessed through the +/// subscript operation is a known constant 0. This avoids a potential invalid +/// memory access when the container is empty. Cases where the constant is not +/// explictly zero can be addressed through the clang static analyzer, and those +/// which cannot be statically identified can be caught using UBSan. +class ContainerDataPointerCheck : public ClangTidyCheck { +public: + ContainerDataPointerCheck(StringRef Name, ClangTidyContext *Context); + + bool isLanguageVersionSupported(const LangOptions &LO) const override { + return LO.CPlusPlus11; + } + + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + + llvm::Optional<TraversalKind> getCheckTraversalKind() const override { + return TK_IgnoreUnlessSpelledInSource; + } +}; +} // namespace readability +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERDATAPOINTERCHECK_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index 366541a0ed487..2d6540283ded5 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -12,6 +12,7 @@ #include "AvoidConstParamsInDecls.h" #include "BracesAroundStatementsCheck.h" #include "ConstReturnTypeCheck.h" +#include "ContainerDataPointerCheck.h" #include "ContainerSizeEmptyCheck.h" #include "ConvertMemberFunctionsToStatic.h" #include "DeleteNullPointerCheck.h" @@ -62,6 +63,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-braces-around-statements"); CheckFactories.registerCheck<ConstReturnTypeCheck>( "readability-const-return-type"); + CheckFactories.registerCheck<ContainerDataPointerCheck>( + "readability-container-data-pointer"); CheckFactories.registerCheck<ContainerSizeEmptyCheck>( "readability-container-size-empty"); CheckFactories.registerCheck<ConvertMemberFunctionsToStatic>( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 609064f7610b0..1b1f00de4d69c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -91,6 +91,11 @@ New checks variables and function parameters only. +- New :doc:`readability-data-pointer <clang-tidy/checks/readability-data-pointer` check. + + Finds cases where code could use ``data()`` rather than the address of the + element at index 0 in a container. + New check aliases ^^^^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-data-pointer.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-data-pointer.rst new file mode 100644 index 0000000000000..46febd26496d6 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-data-pointer.rst @@ -0,0 +1,13 @@ +.. title:: clang-tidy - readability-data-pointer + +readability-data-pointer +======================== + +Finds cases where code could use ``data()`` rather than the address of the +element at index 0 in a container. This pattern is commonly used to materialize +a pointer to the backing data of a container. ``std::vector`` and +``std::string`` provide a ``data()`` accessor to retrieve the data pointer which +should be preferred. + +This also ensures that in the case that the container is empty, the data pointer +access does not perform an errant memory access. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-container-data-pointer.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-container-data-pointer.cpp new file mode 100644 index 0000000000000..0c1208c85b6b4 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-container-data-pointer.cpp @@ -0,0 +1,110 @@ +// RUN: %check_clang_tidy %s readability-container-data-pointer %t + +typedef __SIZE_TYPE__ size_t; + +namespace std { +template <typename T> +struct vector { + using size_type = size_t; + + vector(); + explicit vector(size_type); + + T *data(); + const T *data() const; + + T &operator[](size_type); + const T &operator[](size_type) const; +}; + +template <typename T> +struct basic_string { + using size_type = size_t; + + basic_string(); + + T *data(); + const T *data() const; + + T &operator[](size_t); + const T &operator[](size_type) const; +}; + +typedef basic_string<char> string; +typedef basic_string<wchar_t> wstring; + +template <typename T> +struct is_integral; + +template <> +struct is_integral<size_t> { + static const bool value = true; +}; + +template <bool, typename T = void> +struct enable_if { }; + +template <typename T> +struct enable_if<true, T> { + typedef T type; +}; +} + +template <typename T> +void f(const T *); + +#define z (0) + +void g(size_t s) { + std::vector<unsigned char> b(s); + f(&((b)[(z)])); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] + // CHECK-FIXES: {{^ }}f(b.data());{{$}} +} + +void h() { + std::string s; + f(&((s).operator[]((z)))); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] + // CHECK-FIXES: {{^ }}f(s.data());{{$}} + + std::wstring w; + f(&((&(w))->operator[]((z)))); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] + // CHECK-FIXES: {{^ }}f(w.data());{{$}} +} + +template <typename T, typename U, + typename = typename std::enable_if<std::is_integral<U>::value>::type> +void i(U s) { + std::vector<T> b(s); + f(&b[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] + // CHECK-FIXES: {{^ }}f(b.data());{{$}} +} + +template void i<unsigned char, size_t>(size_t); + +void j(std::vector<unsigned char> * const v) { + f(&(*v)[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] + // CHECK-FIXES: {{^ }}f(v->data());{{$}} +} + +void k(const std::vector<unsigned char> &v) { + f(&v[0]); + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] + // CHECK-FIXES: {{^ }}f(v.data());{{$}} +} + +void l() { + unsigned char b[32]; + f(&b[0]); + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:5: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] +} + +template <typename T> +void m(const std::vector<T> &v) { + const T *p = &v[0]; + // CHECK-MESSAGES-NOT: :[[@LINE-1]]:16: warning: 'data' should be used for accessing the data pointer instead of taking the address of the 0-th element [readability-container-data-pointer] +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits