llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-analysis Author: Utkarsh Saxena (usx95) <details> <summary>Changes</summary> --- Patch is 45.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147208.diff 7 Files Affected: - (added) clang/include/clang/Analysis/Analyses/LifetimeSafety.h (+13) - (modified) clang/include/clang/Basic/DiagnosticGroups.td (+3) - (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+4) - (modified) clang/lib/Analysis/CMakeLists.txt (+1) - (added) clang/lib/Analysis/LifetimeSafety.cpp (+761) - (modified) clang/lib/Sema/AnalysisBasedWarnings.cpp (+10) - (added) clang/test/Sema/warn-lifetime-safety-dataflow.cpp (+377) ``````````diff diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h new file mode 100644 index 0000000000000..daf24fff72b9b --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -0,0 +1,13 @@ +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#include "clang/AST/DeclBase.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +namespace clang { + +void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC); + +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 36fa3227fd6a6..e474562685e3a 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -532,6 +532,9 @@ def Dangling : DiagGroup<"dangling", [DanglingAssignment, DanglingInitializerList, DanglingGsl, ReturnStackAddress]>; + +def LifetimeSafety : DiagGroup<"experimental-lifetime-safety">; + def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">; def DllexportExplicitInstantiationDecl : DiagGroup<"dllexport-explicit-instantiation-decl">; def ExcessInitializers : DiagGroup<"excess-initializers">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 968edd967e0c5..17dab9748d296 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10622,6 +10622,10 @@ def warn_dangling_reference_captured_by_unknown : Warning< "object whose reference is captured will be destroyed at the end of " "the full-expression">, InGroup<DanglingCapture>; +def warn_experimental_lifetime_safety_dummy_warning : Warning< + "todo: remove this warning after we have atleast one warning based on the lifetime analysis">, + InGroup<LifetimeSafety>, DefaultIgnore; + // For non-floating point, expressions of the form x == x or x != x // should result in a warning, since these always evaluate to a constant. // Array comparisons have similar warnings diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 8cd3990db4c3e..0523d92480cb3 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,7 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp new file mode 100644 index 0000000000000..7870352f0287a --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -0,0 +1,761 @@ +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TimeProfiler.h" +#include <cstdint> + +namespace clang { +namespace { + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +struct AccessPath { + const clang::ValueDecl *D; + + enum class Kind : uint8_t { + StackVariable, + Temporary, // TODO: Handle. + Field, // TODO: Handle like `s.y`. + Heap, // TODO: Handle. + ArrayElement, // TODO: Handle. + Static, // TODO: Handle. + }; + + Kind PathKind; + + AccessPath(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {} +}; + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template <typename Tag> struct ID { + uint32_t Value = 0; + + bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } + bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } + bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } + ID<Tag> operator++(int) { + ID<Tag> Tmp = *this; + ++Value; + return Tmp; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } +}; + +template <typename Tag> +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) { + return OS << ID.Value; +} + +using LoanID = ID<struct LoanTag>; +using OriginID = ID<struct OriginTag>; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is taken. +struct Loan { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + AccessPath Path; + SourceLocation IssueLoc; + + Loan(LoanID id, AccessPath path, SourceLocation loc) + : ID(id), Path(path), IssueLoc(loc) {} +}; + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Enhance the origin model to handle complex types, pointer +/// indirection and reborrowing. The plan is to move from a single origin per +/// variable/expression to a "list of origins" governed by the Type. +/// For example, the type 'int**' would have two origins. +/// See discussion: +/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238 +struct Origin { + OriginID ID; + llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast<const clang::ValueDecl *>(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast<const clang::Expr *>(); + } +}; + +class LoanManager { +public: + LoanManager() = default; + + Loan &addLoan(AccessPath Path, SourceLocation Loc) { + AllLoans.emplace_back(getNextLoanID(), Path, Loc); + return AllLoans.back(); + } + + const Loan &getLoan(LoanID ID) const { + assert(ID.Value < AllLoans.size()); + return AllLoans[ID.Value]; + } + llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } + +private: + LoanID getNextLoanID() { return NextLoanID++; } + + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Loan> AllLoans; +}; + +class OriginManager { +public: + OriginManager() = default; + + Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) { + AllOrigins.emplace_back(ID, &D); + return AllOrigins.back(); + } + Origin &addOrigin(OriginID ID, const clang::Expr &E) { + AllOrigins.emplace_back(ID, &E); + return AllOrigins.back(); + } + + OriginID get(const Expr &E) { + // Origin of DeclRefExpr is that of the declaration it refers to. + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) { + return get(*DRE->getDecl()); + } + auto It = ExprToOriginID.find(&E); + // TODO: This should be an assert(It != ExprToOriginID.end()). The current + // implementation falls back to getOrCreate to avoid crashing on + // yet-unhandled pointer expressions, creating an empty origin for them. + if (It == ExprToOriginID.end()) + return getOrCreate(E); + + return It->second; + } + + OriginID get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + assert(It != DeclToOriginID.end()); + return It->second; + } + + OriginID getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); + if (It != ExprToOriginID.end()) + return It->second; + + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) { + // Origin of DeclRefExpr is that of the declaration it refers to. + return getOrCreate(*DRE->getDecl()); + } + OriginID NewID = getNextOriginID(); + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; + return NewID; + } + + const Origin &getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; + } + + llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; + return NewID; + } + +private: + OriginID getNextOriginID() { return NextOriginID++; } + + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Origin> AllOrigins; + llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; + llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; +}; + +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + AssignOrigin, + /// An origin is part of a function's return value. + ReturnOfOrigin + }; + +private: + Kind K; + +protected: + Fact(Kind K) : K(K) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + + template <typename T> const T *getAs() const { + if (T::classof(this)) + return static_cast<const T *>(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS) const { + OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; + } +}; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() + << ")\n"; + } +}; + +class ExpireFact : public Fact { + LoanID LID; + +public: + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {} + LoanID getLoanID() const { return LID; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Expire (LoanID: " << getLoanID() << ")\n"; + } +}; + +class AssignOriginFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::AssignOrigin; + } + + AssignOriginFact(OriginID OIDDest, OriginID OIDSrc) + : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + void dump(llvm::raw_ostream &OS) const override { + OS << "AssignOrigin (DestID: " << getDestOriginID() + << ", SrcID: " << getSrcOriginID() << ")\n"; + } +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + void dump(llvm::raw_ostream &OS) const override { + OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; + } +}; + +class FactManager { +public: + llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { + if (!NewFacts.empty()) { + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } + } + + template <typename FactType, typename... Args> + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate<FactType>(); + return new (Mem) FactType(std::forward<Args>(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) { + if (const auto *ND = dyn_cast<NamedDecl>(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + } + // Print blocks in the order as they appear in code for a stable ordering. + for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs()); + } + } + llvm::dbgs() << " End of Block\n"; + } + } + + LoanManager &getLoanMgr() { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + +private: + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; + +class FactGenerator : public ConstStmtVisitor<FactGenerator> { + +public: + FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), AC(AC) {} + + void run() { + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that + // initializations and destructions are processed in the correct sequence. + for (const CFGBlock *Block : *AC.getAnalysis<PostOrderCFGView>()) { + CurrentBlockFacts.clear(); + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) + Visit(CS->getStmt()); + else if (std::optional<CFGAutomaticObjDtor> DtorOpt = + Element.getAs<CFGAutomaticObjDtor>()) + handleDestructor(*DtorOpt); + } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast<VarDecl>(D)) + if (hasOrigin(VD->getType())) + if (const Expr *InitExpr = VD->getInit()) + addAssignOriginFact(*VD, *InitExpr); + } + + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE->getType())) + return; + Visit(ICE->getSubExpr()); + /// TODO: Consider if this is actually useful in practice. Alternatively, we + /// could directly use the sub-expression's OriginID instead of creating a + /// new one. + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + addAssignOriginFact(*ICE, *ICE->getSubExpr()); + } + + void VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + // Check if it's a local variable. + if (VD->hasLocalStorage()) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); + AccessPath AddrOfLocalVarPath(VD, AccessPath::Kind::StackVariable); + Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, + UO->getOperatorLoc()); + CurrentBlockFacts.push_back( + FactMgr.createFact<IssueFact>(L.ID, OID)); + } + } + } + } + } + + void VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr->getType())) { + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back( + FactMgr.createFact<ReturnOfOriginFact>(OID)); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) { + const Expr *LHSExpr = BO->getLHS(); + const Expr *RHSExpr = BO->getRHS(); + + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` + // LHS must be a pointer/reference type that can be an origin. + // RHS must also represent an origin (either another pointer/ref or an + // address-of). + if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) + if (const auto *VD_LHS = + dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl()); + VD_LHS && hasOrigin(VD_LHS->getType())) + addAssignOriginFact(*VD_LHS, *RHSExpr); + } + } + +private: + // Check if a type have an origin. + bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } + + template <typename Destination, typename Source> + void addAssignOriginFact(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID)); + } + + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + /// variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.PathKind == AccessPath::Kind::StackVariable) { + if (LoanPath.D == DestructedVD) { + CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID)); + } + } + } + } + + FactManager &FactMgr; + AnalysisDeclContext &AC; + llvm::SmallVector<Fact *> CurrentBlockFacts; +}; + +// ========================================================================= // +// The Dataflow Lattice +// ========================================================================= // + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet<LoanID>; +using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; + +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFact; + LoanSet::Factory LoanSetFact; + + LoanSet createLoanSet(LoanID LID) { + return LoanSetFact.add(LoanSetFact.getEmptySet(), LID); + } +}; + +/// LifetimeLattice represents the state of our analysis at a given program +/// point. It is an immutabl... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/147208 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits