================ @@ -0,0 +1,748 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ConflictingGlobalAccesses.h" + +#include "clang/AST/RecursiveASTVisitor.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::bugprone { + +namespace { +// An AccesKind represents one access to a global variable. +// +// The unchecked versions represent reads/writes that are not handled by +// -Wunsequenced. (e.g. accesses inside functions). +using AccessKind = uint8_t; +static constexpr AccessKind AkRead = 0; +static constexpr AccessKind AkWrite = 1; +static constexpr AccessKind AkUncheckedRead = 2; +static constexpr AccessKind AkUncheckedWrite = 3; + +static constexpr uint8_t AkCount = 4; + +// The TraversalResultKind represents a set of accesses. +// Bits are corresponding to the AccessKind enum values. +using TraversalResultKind = uint8_t; +static constexpr TraversalResultKind TrInvalid = 0; +static constexpr TraversalResultKind TrRead = 1 << AkRead; +static constexpr TraversalResultKind TrWrite = 1 << AkWrite; +static constexpr TraversalResultKind TrUncheckedWrite = 1 << AkUncheckedWrite; + +// To represent fields in structs or unions we use numbered FieldIndices. The +// FieldIndexArray represents one field inside a global struct/union system. +// The FieldIndexArray can be thought of as a path inside a tree. +using FieldIndex = uint16_t; +static constexpr FieldIndex FiUnion = 0x8000; + +// Note: This bit signals whether the field is a *field of* a struct or a +// union, not whether the type of the field itself is a struct or a union. +using FieldIndexArray = SmallVector<FieldIndex>; + +/// One traversal recurses into one side of a binary expression or one +/// parameter of a function call. At least two of these traversals are used to +/// find conflicting accesses. +/// +/// A TraversalResult represents one traversal. +struct TraversalResult { + int IndexCreated; // We use indices to keep track of which + // traversal we are in currently. The current + // index is stored in GlobalRWVisitor with the + // name TraversalIndex. + SourceLocation Loc[AkCount]; + TraversalResultKind Kind; + + TraversalResult(); + TraversalResult(int Index, SourceLocation Loc, AccessKind Access); + void addNewAccess(SourceLocation Loc, AccessKind Access); +}; + +/// The result of a number of traversals. +class TraversalAggregation { + DeclarationName DeclName; // The name of the global variable being checked. + + // We only store the result of two traversals as two conflicting accesses + // are enough to detect undefined behavior. The two stored TraversalResults + // have different traversal indices. + // + // Note: Sometimes multiple traversals are merged into one + // TraversalResult. + TraversalResult MainPart, OtherPart; + // Pairings that are not reportable: Read-Read, Read-Write, + // Read-UncheckedRead, Write-Write, UncheckedRead-UncheckedRead. + +public: + TraversalAggregation(); + TraversalAggregation(DeclarationName Name, SourceLocation Loc, + AccessKind Access, int Index); + void addGlobalRW(SourceLocation Loc, AccessKind Access, int Index); + DeclarationName getDeclName() const; + + bool isValid() const; + + // If there is a conflict and that conflict isn't reported by -Wunsequenced + // then we report the conflict. + bool shouldBeReported() const; + bool hasConflictingOperations() const; + +private: + bool hasTwoAccesses() const; + bool isReportedByWunsequenced() const; +}; + +/// The ObjectAccessTree stores the TraversalAggregations of one global +/// struct/union. Because each field can be handled as a single variable, the +/// tree stores one TraversalAggregation for every field. +/// +/// Note: structs, classes, and unions are called objects in the code. +struct ObjectAccessTree { + using FieldMap = llvm::DenseMap<int, std::unique_ptr<ObjectAccessTree>>; + TraversalAggregation OwnAccesses; + + // In a union, new fields should inherit from UnionTemporalAccesses + // instead of OwnAccesses. That's because an access to a field of a union is + // also an access to every other field of the same union. + TraversalAggregation UnionTemporalAccesses; + + // We try to be lazy and only store fields that are actually accessed. + FieldMap Fields; + bool IsUnion; + + ObjectAccessTree(TraversalAggregation Own); + + void addFieldToAll(SourceLocation Loc, AccessKind Access, int Index); + void addFieldToAllExcept(uint16_t ExceptIndex, SourceLocation Loc, + AccessKind Access, int Index); +}; + +/// This object is the root of all ObjectAccessTrees. +class ObjectTraversalAggregation { + DeclarationName DeclName; // The name of the global struct/union. + ObjectAccessTree AccessTree; + +public: + ObjectTraversalAggregation(DeclarationName Name, SourceLocation Loc, + FieldIndexArray FieldIndices, AccessKind Access, + int Index); + void addFieldRW(SourceLocation Loc, FieldIndexArray FieldIndices, + AccessKind Access, int Index); + DeclarationName getDeclName() const; + bool shouldBeReported() const; + +private: + bool shouldBeReportedRec(const ObjectAccessTree *Node) const; +}; + +/// GlobalRWVisitor (global read write visitor) does all the traversals. +class GlobalRWVisitor : public RecursiveASTVisitor<GlobalRWVisitor> { + friend RecursiveASTVisitor<GlobalRWVisitor>; + +public: + GlobalRWVisitor(bool IsWritePossibleThroughFunctionParam); + + // startTraversal is called to start a new traversal. It increments the + // TraversalIndex, which in turn will generate new TraversalResults. + void startTraversal(Expr *E); + + const llvm::SmallVector<TraversalAggregation> &getGlobalsFound() const; + + const llvm::SmallVector<ObjectTraversalAggregation> + &getObjectGlobalsFound() const; + +protected: + // RecursiveASTVisitor overrides + bool VisitDeclRefExpr(DeclRefExpr *S); + bool VisitUnaryOperator(UnaryOperator *Op); + bool VisitBinaryOperator(BinaryOperator *Op); + bool VisitCallExpr(CallExpr *CE); + bool VisitMemberExpr(MemberExpr *ME); + +private: + void visitCallExprArgs(CallExpr *CE); + void traverseFunctionsToBeChecked(); + + llvm::SmallVector<TraversalAggregation> GlobalsFound; + llvm::SmallVector<ObjectTraversalAggregation> ObjectGlobalsFound; + + // We check inside functions only if the functions hasn't been checked in + // the current traversal. We use this array to check if the function is + // already registered to be checked. + llvm::SmallVector<std::pair<DeclarationName, Stmt *>> FunctionsToBeChecked; + + // The TraversalIndex is used to differentiate between two sides of a binary + // expression or the parameters of a function. Every traversal represents + // one such expression and the TraversalIndex is incremented between them. + int TraversalIndex; + + // Accesses that are inside functions are not checked by -Wunsequenced, + // therefore we keep track of whether we are inside of a function or not. + bool IsInFunction; + + // Same as the HandleMutableFunctionParametersAsWrites option. + bool IsWritePossibleThroughFunctionParam; + + void addGlobal(DeclarationName Name, SourceLocation Loc, bool IsWrite, + bool IsUnchecked); + void addGlobal(const DeclRefExpr *DR, bool IsWrite, bool IsUnchecked); + void addField(DeclarationName Name, FieldIndexArray FieldIndices, + SourceLocation Loc, bool IsWrite, bool IsUnchecked); + bool handleModified(const Expr *Modified, bool IsUnchecked); + bool handleModifiedVariable(const DeclRefExpr *DE, bool IsUnchecked); + bool handleAccessedObject(const Expr *E, bool IsWrite, bool IsUnchecked); + bool isVariable(const Expr *E); +}; +} // namespace + +static bool isGlobalDecl(const VarDecl *VD) { + return VD && VD->hasGlobalStorage() && VD->getLocation().isValid() && + !VD->getType().isConstQualified(); +} + +AST_MATCHER_P(Expr, twoGlobalWritesBetweenSequencePoints, const LangStandard *, + LangStd) { + assert(LangStd); + + const Expr *E = &Node; + + if (const BinaryOperator *Op = dyn_cast<BinaryOperator>(E)) { + const BinaryOperator::Opcode Code = Op->getOpcode(); + if (Code == BO_LAnd || Code == BO_LOr || Code == BO_Comma) { + return false; + } + + if (Op->isAssignmentOp() && isa<DeclRefExpr>(Op->getLHS())) { + return false; + } + + if (LangStd->isCPlusPlus17() && + (Code == BO_Shl || Code == BO_Shr || Code == BO_PtrMemD || + Code == BO_PtrMemI || Op->isAssignmentOp())) { + return false; + } + + return true; + } + + if (isa<CallExpr>(E)) { + return true; + } + + return false; +} + +ConflictingGlobalAccesses::ConflictingGlobalAccesses(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + HandleMutableFunctionParametersAsWrites( + Options.get("HandleMutableFunctionParametersAsWrites", false)) +{} + +void ConflictingGlobalAccesses::storeOptions(ClangTidyOptions::OptionMap& Opts) +{ + Options.store(Opts, "HandleMutableFunctionParametersAsWrites", + HandleMutableFunctionParametersAsWrites); +} + +void ConflictingGlobalAccesses::registerMatchers(MatchFinder *Finder) { + + const LangStandard *LangStd = + &LangStandard::getLangStandardForKind(getLangOpts().LangStd); + + ast_matchers::internal::Matcher<Expr> GlobalAccessMatcher = + twoGlobalWritesBetweenSequencePoints(LangStd); + + Finder->addMatcher( + stmt(traverse(TK_AsIs, expr(GlobalAccessMatcher).bind("gw"))), this); +} ---------------- PiotrZSL wrote:
this matcher will be executed way to often. twoGlobalWritesBetweenSequencePoints doesnt need to exist, match here call expr and binary operator directly. what about overloaded operators ? https://github.com/llvm/llvm-project/pull/130421 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits