https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/142313
>From 0cd187b01e61b200d92ca0b640789c1586075142 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 2 Jun 2025 17:53:14 +0000 Subject: [PATCH 1/2] Introduce Intra-procedural lifetime analysis in Clang --- .../clang/Analysis/Analyses/LifetimeSafety.h | 13 + clang/lib/Analysis/CMakeLists.txt | 1 + clang/lib/Analysis/LifetimeSafety.cpp | 728 ++++++++++++++++++ clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 + .../Sema/warn-lifetime-safety-dataflow.cpp | 362 +++++++++ 5 files changed, 1112 insertions(+) create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety.h create mode 100644 clang/lib/Analysis/LifetimeSafety.cpp create mode 100644 clang/test/Sema/warn-lifetime-safety-dataflow.cpp diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h new file mode 100644 index 0000000000000..daf24fff72b9b --- /dev/null +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -0,0 +1,13 @@ +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#include "clang/AST/DeclBase.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +namespace clang { + +void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC); + +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt index 8cd3990db4c3e..0523d92480cb3 100644 --- a/clang/lib/Analysis/CMakeLists.txt +++ b/clang/lib/Analysis/CMakeLists.txt @@ -21,6 +21,7 @@ add_clang_library(clangAnalysis FixitUtil.cpp IntervalPartition.cpp IssueHash.cpp + LifetimeSafety.cpp LiveVariables.cpp MacroExpansionContext.cpp ObjCNoReturn.cpp diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp new file mode 100644 index 0000000000000..8dbb4dc37026c --- /dev/null +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -0,0 +1,728 @@ +#include "clang/Analysis/Analyses/LifetimeSafety.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Type.h" +#include "clang/Analysis/AnalysisDeclContext.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/ImmutableMap.h" +#include "llvm/ADT/ImmutableSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TimeProfiler.h" +#include <vector> + +namespace clang { +namespace { + +struct Point { + const clang::CFGBlock *Block; + /// Index into Block->Elements(). + unsigned ElementIndex; + + Point(const clang::CFGBlock *B = nullptr, unsigned Idx = 0) + : Block(B), ElementIndex(Idx) {} + + bool operator==(const Point &Other) const { + return Block == Other.Block && ElementIndex == Other.ElementIndex; + } +}; + +/// Represents the storage location being borrowed, e.g., a specific stack +/// variable. +/// TODO: Handle member accesseslike `s.y`. +struct Path { + const clang::ValueDecl *D; + + enum class Kind : uint8_t { + StackVariable, + Heap, // TODO: Handle. + Field, // TODO: Handle. + ArrayElement, // TODO: Handle. + TemporaryObject, // TODO: Handle. + StaticOrGlobal, // TODO: Handle. + }; + + Kind PathKind; + + Path(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {} +}; + +using LoanID = uint32_t; +using OriginID = uint32_t; + +/// Information about a single borrow, or "Loan". A loan is created when a +/// reference or pointer is taken. +struct LoanInfo { + /// TODO: Represent opaque loans. + /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it + /// is represented as empty LoanSet + LoanID ID; + Path SourcePath; + SourceLocation IssueLoc; + + LoanInfo(LoanID id, Path path, SourceLocation loc) + : ID(id), SourcePath(path), IssueLoc(loc) {} +}; + +enum class OriginKind : uint8_t { Variable, ExpressionResult }; + +/// An Origin is a symbolic identifier that represents the set of possible +/// loans a pointer-like object could hold at any given time. +/// TODO: Also represent Origins of complex types (fields, inner types). +struct OriginInfo { + OriginID ID; + OriginKind Kind; + union { + const clang::ValueDecl *Decl; + const clang::Expr *Expression; + }; + OriginInfo(OriginID id, OriginKind kind, const clang::ValueDecl *D) + : ID(id), Kind(kind), Decl(D) {} + OriginInfo(OriginID id, OriginKind kind, const clang::Expr *E) + : ID(id), Kind(kind), Expression(E) {} +}; + +class LoanManager { +public: + LoanManager() = default; + + LoanInfo &addLoanInfo(Path path, SourceLocation loc) { + NextLoanIDVal++; + AllLoans.emplace_back(NextLoanIDVal, path, loc); + return AllLoans.back(); + } + + const LoanInfo *getLoanInfo(LoanID id) const { + if (id < AllLoans.size()) + return &AllLoans[id]; + return nullptr; + } + llvm::ArrayRef<LoanInfo> getLoanInfos() const { return AllLoans; } + +private: + LoanID NextLoanIDVal = 0; + llvm::SmallVector<LoanInfo> AllLoans; +}; + +class OriginManager { +public: + OriginManager() = default; + + OriginID getNextOriginID() { return NextOriginIDVal++; } + OriginInfo &addOriginInfo(OriginID id, const clang::ValueDecl *D) { + assert(D != nullptr); + AllOrigins.emplace_back(id, OriginKind::Variable, D); + return AllOrigins.back(); + } + OriginInfo &addOriginInfo(OriginID id, const clang::Expr *E) { + assert(E != nullptr); + AllOrigins.emplace_back(id, OriginKind::ExpressionResult, E); + return AllOrigins.back(); + } + + OriginID getOrCreate(const Expr *E) { + auto It = ExprToOriginID.find(E); + if (It != ExprToOriginID.end()) + return It->second; + + if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) { + // Origin of DeclRefExpr is that of the declaration it refers to. + return getOrCreate(DRE->getDecl()); + } + OriginID NewID = getNextOriginID(); + addOriginInfo(NewID, E); + ExprToOriginID[E] = NewID; + return NewID; + } + + const OriginInfo *getOriginInfo(OriginID id) const { + if (id < AllOrigins.size()) + return &AllOrigins[id]; + return nullptr; + } + + llvm::ArrayRef<OriginInfo> getOriginInfos() const { return AllOrigins; } + + OriginID getOrCreate(const ValueDecl *D) { + auto It = DeclToOriginID.find(D); + if (It != DeclToOriginID.end()) + return It->second; + OriginID NewID = getNextOriginID(); + addOriginInfo(NewID, D); + DeclToOriginID[D] = NewID; + return NewID; + } + +private: + OriginID NextOriginIDVal = 0; + llvm::SmallVector<OriginInfo> AllOrigins; + llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; + llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; +}; + +/// An abstract base class for a single, atomic lifetime-relevant event. +class Fact { + +public: + enum class Kind : uint8_t { + /// A new loan is issued from a borrow expression (e.g., &x). + Issue, + /// A loan expires as its underlying storage is freed (e.g., variable goes + /// out of scope). + Expire, + /// An origin is propagated from a source to a destination (e.g., p = q). + AssignOrigin, + /// An origin is part of a function's return value. + ReturnOfOrigin + }; + +private: + Kind K; + +protected: + Point P; + Fact(Kind K, Point Pt) : K(K), P(Pt) {} + +public: + virtual ~Fact() = default; + Kind getKind() const { return K; } + Point getPoint() const { return P; } + + template <typename T> const T *getAs() const { + if (T::classof(this)) + return static_cast<const T *>(this); + return nullptr; + } + + virtual void dump(llvm::raw_ostream &OS) const { + OS << "Fact (Kind: " << static_cast<int>(K) << ", Point: B" + << P.Block->getBlockID() << ":" << P.ElementIndex << ")\n"; + } +}; + +class IssueFact : public Fact { + LoanID LID; + OriginID OID; + +public: + IssueFact(LoanID LID, OriginID OID, Point Pt) + : Fact(Kind::Issue, Pt), LID(LID), OID(OID) {} + LoanID getLoanID() const { return LID; } + OriginID getOriginID() const { return OID; } + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() + << ")\n"; + } +}; + +class ExpireFact : public Fact { + LoanID LID; + +public: + ExpireFact(LoanID LID, Point Pt) : Fact(Kind::Expire, Pt), LID(LID) {} + LoanID getLoanID() const { return LID; } + static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + void dump(llvm::raw_ostream &OS) const override { + OS << "Expire (LoanID: " << getLoanID() << ")\n"; + } +}; + +class AssignOriginFact : public Fact { + OriginID OIDDest; + OriginID OIDSrc; + +public: + AssignOriginFact(OriginID OIDDest, OriginID OIDSrc, Point Pt) + : Fact(Kind::AssignOrigin, Pt), OIDDest(OIDDest), OIDSrc(OIDSrc) {} + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } + static bool classof(const Fact *F) { + return F->getKind() == Kind::AssignOrigin; + } + void dump(llvm::raw_ostream &OS) const override { + OS << "AssignOrigin (DestID: " << getDestOriginID() + << ", SrcID: " << getSrcOriginID() << ")\n"; + } +}; + +class ReturnOfOriginFact : public Fact { + OriginID OID; + +public: + ReturnOfOriginFact(OriginID OID, Point Pt) + : Fact(Kind::ReturnOfOrigin, Pt), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } + static bool classof(const Fact *F) { + return F->getKind() == Kind::ReturnOfOrigin; + } + void dump(llvm::raw_ostream &OS) const override { + OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; + } +}; + +class FactManager { +public: + llvm::ArrayRef<Fact *> getFacts(const CFGBlock *B) const { + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) + return It->second; + return {}; + } + + void addFact(const CFGBlock *B, Fact *NewFact) { + BlockToFactsMap[B].push_back(NewFact); + } + + template <typename FactType, typename... Args> + FactType *createFact(Args &&...args) { + void *Mem = FactAllocator.Allocate<FactType>(); + return new (Mem) FactType(std::forward<Args>(args)...); + } + + void dump(const CFG &Cfg, AnalysisDeclContext &AC) const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Lifetime Analysis Facts:\n"; + llvm::dbgs() << "==========================================\n"; + if (const Decl *D = AC.getDecl()) { + if (const auto *ND = dyn_cast<NamedDecl>(D)) + llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; + } + // Print blocks in the order as they appear in code for a stable ordering. + ForwardDataflowWorklist worklist(Cfg, AC); + for (const CFGBlock *B : Cfg.const_nodes()) + worklist.enqueueBlock(B); + while (const CFGBlock *B = worklist.dequeue()) { + llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; + auto It = BlockToFactsMap.find(B); + if (It != BlockToFactsMap.end()) { + for (const Fact *F : It->second) { + llvm::dbgs() << " "; + F->dump(llvm::dbgs()); + } + } + llvm::dbgs() << " End of Block\n"; + } + } + +private: + llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<Fact *>> + BlockToFactsMap; + llvm::BumpPtrAllocator FactAllocator; +}; + +struct FactsContext { + FactManager Facts; + LoanManager Loans; + OriginManager Origins; +}; + +class FactGenerator : public ConstStmtVisitor<FactGenerator> { + +public: + FactGenerator(const CFG &Cfg, FactsContext &FactsCtx, AnalysisDeclContext &AC) + : FactsCtx(FactsCtx), Cfg(Cfg), AC(AC), CurrentBlock(nullptr) {} + + void run() { + llvm::TimeTraceScope TimeProfile("Fact Generation"); + // Iterate through the CFG blocks in pre-order to ensure that + // initializations and destructions are processed in the correct sequence. + // TODO: A pre-order traversal utility should be provided by Dataflow + // framework. + ForwardDataflowWorklist Worklist(Cfg, AC); + for (const CFGBlock *B : Cfg.const_nodes()) + Worklist.enqueueBlock(B); + while (const CFGBlock *Block = Worklist.dequeue()) { + CurrentBlock = Block; + for (unsigned I = 0; I < Block->size(); ++I) { + const CFGElement &Element = Block->Elements[I]; + CurrentPoint = Point(Block, I); + if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) + Visit(CS->getStmt()); + else if (std::optional<CFGAutomaticObjDtor> DtorOpt = + Element.getAs<CFGAutomaticObjDtor>()) + handleDestructor(*DtorOpt); + } + } + } + + void VisitDeclStmt(const DeclStmt *DS) { + for (const Decl *D : DS->decls()) { + if (const auto *VD = dyn_cast<VarDecl>(D)) { + if (hasOrigin(VD->getType())) { + if (const Expr *InitExpr = VD->getInit()) { + OriginID DestOID = FactsCtx.Origins.getOrCreate(VD); + OriginID SrcOID = FactsCtx.Origins.getOrCreate(InitExpr); + FactsCtx.Facts.addFact(CurrentBlock, + FactsCtx.Facts.createFact<AssignOriginFact>( + DestOID, SrcOID, CurrentPoint)); + } + } + } + } + } + + void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { + if (!hasOrigin(ICE->getType())) + return; + // An ImplicitCastExpr node itself gets an origin, which flows from the + // origin of its sub-expression (after stripping its own parens/casts). + if (ICE->getCastKind() == CK_LValueToRValue) { + OriginID IceOID = FactsCtx.Origins.getOrCreate(ICE); + OriginID SubExprOID = FactsCtx.Origins.getOrCreate(ICE->getSubExpr()); + FactsCtx.Facts.addFact(CurrentBlock, + FactsCtx.Facts.createFact<AssignOriginFact>( + IceOID, SubExprOID, CurrentPoint)); + } + } + + void VisitUnaryOperator(const UnaryOperator *UO) { + if (UO->getOpcode() == UO_AddrOf) { + const Expr *SubExpr = UO->getSubExpr(); + if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) { + if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { + // Check if it's a local variable. + if (VD->hasLocalStorage()) { + OriginID OID = FactsCtx.Origins.getOrCreate(UO); + Path AddrOfLocalVarPath(VD, Path::Kind::StackVariable); + LoanInfo &Loan = FactsCtx.Loans.addLoanInfo(AddrOfLocalVarPath, + UO->getOperatorLoc()); + FactsCtx.Facts.addFact(CurrentBlock, + FactsCtx.Facts.createFact<IssueFact>( + Loan.ID, OID, CurrentPoint)); + } + } + } + } + } + + void VisitReturnStmt(const ReturnStmt *RS) { + if (const Expr *RetExpr = RS->getRetValue()) { + if (hasOrigin(RetExpr->getType())) { + OriginID OID = FactsCtx.Origins.getOrCreate(RetExpr); + FactsCtx.Facts.addFact( + CurrentBlock, + FactsCtx.Facts.createFact<ReturnOfOriginFact>(OID, CurrentPoint)); + } + } + } + + void VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) { + const Expr *LHSExpr = BO->getLHS(); + const Expr *RHSExpr = BO->getRHS(); + + // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var` + // LHS must be a pointer/reference type that can be an origin. + // RHS must also represent an origin (either another pointer/ref or an + // address-of). + if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) { + if (const auto *VD_LHS = + dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl()); + VD_LHS && hasOrigin(VD_LHS->getType())) { + OriginID DestOID = FactsCtx.Origins.getOrCreate(VD_LHS); + OriginID SrcOID = FactsCtx.Origins.getOrCreate(RHSExpr); + FactsCtx.Facts.addFact(CurrentBlock, + FactsCtx.Facts.createFact<AssignOriginFact>( + DestOID, SrcOID, CurrentPoint)); + } + } + } + } + +private: + // Check if a type have an origin. + bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } + + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { + /// TODO: Also handle trivial destructors (e.g., for `int` + // variables) which will never have a CFGAutomaticObjDtor node. + /// TODO: Handle loans to temporaries. + const VarDecl *DestructedVD = DtorOpt.getVarDecl(); + if (!DestructedVD) + return; + // Iterate through all loans to see if any expire. + for (const LoanInfo &Loan : FactsCtx.Loans.getLoanInfos()) { + const Path &LoanPath = Loan.SourcePath; + // Check if the loan is for a stack variable and if that variable + // is the one being destructed. + if (LoanPath.PathKind == Path::Kind::StackVariable) { + if (LoanPath.D == DestructedVD) { + FactsCtx.Facts.addFact( + CurrentBlock, + FactsCtx.Facts.createFact<ExpireFact>(Loan.ID, CurrentPoint)); + } + } + } + } + + FactsContext &FactsCtx; + const CFG &Cfg; + AnalysisDeclContext &AC; + const CFGBlock *CurrentBlock; + Point CurrentPoint; +}; + +// ========================================================================= // +// The Dataflow Lattice +// ========================================================================= // + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet<LoanID>; +using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; + +/// A context object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFact; + LoanSet::Factory LoanSetFact; + + LoanSet createLoanSet(LoanID LID) { + return LoanSetFact.add(LoanSetFact.getEmptySet(), LID); + } +}; + +/// LifetimeLattice represents the state of our analysis at a given program +/// point. It is an immutable object, and all operations produce a new +/// instance rather than modifying the existing one. +struct LifetimeLattice { + /// The map from an origin to the set of loans it contains. + /// TODO(opt): To reduce the lattice size, propagate origins of declarations, + /// not expressions, because expressions are not visible across blocks. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {} + LifetimeLattice() = default; + + bool operator==(const LifetimeLattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const LifetimeLattice &Other) const { + return !(*this == Other); + } + + LoanSet getLoans(OriginID OID, LifetimeFactory &Factory) const { + if (auto *Loans = Origins.lookup(OID)) + return *Loans; + return Factory.LoanSetFact.getEmptySet(); + } + + /// Computes the union of two lattices by performing a key-wise merge of + /// their OriginLoanMaps. + LifetimeLattice merge(const LifetimeLattice &Other, + LifetimeFactory &Factory) const { + /// Merge the smaller map into the larger one ensuring we iterate over the + /// smaller map. + if (Origins.getHeight() < Other.Origins.getHeight()) + return Other.merge(*this, Factory); + + OriginLoanMap MergedState = Origins; + // For each origin in the other map, union its loan set with ours. + for (const auto &Entry : Other.Origins) { + OriginID OID = Entry.first; + LoanSet OtherLoanSet = Entry.second; + MergedState = Factory.OriginMapFact.add( + MergedState, OID, + merge(getLoans(OID, Factory), OtherLoanSet, Factory)); + } + return LifetimeLattice(MergedState); + } + + LoanSet merge(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { + /// Merge the smaller set into the larger one ensuring we iterate over the + /// smaller set. + if (a.getHeight() < b.getHeight()) + std::swap(a, b); + LoanSet Result = a; + for (LoanID LID : b) { + /// TODO(opt): Profiling shows that this loop is a major performance + /// bottleneck. Investigate using a BitVector to represent the set of + /// loans for improved merge performance. + Result = Factory.LoanSetFact.add(Result, LID); + } + return Result; + } + + void dump(llvm::raw_ostream &OS) const { + OS << "LifetimeLattice State:\n"; + if (Origins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +// ========================================================================= // +// The Transfer Function +// ========================================================================= // +class Transferer { + FactManager &AllFacts; + LifetimeFactory &Factory; + +public: + explicit Transferer(FactManager &F, LifetimeFactory &Factory) + : AllFacts(F), Factory(Factory) {} + + /// Computes the exit state of a block by applying all its facts sequentially + /// to a given entry state. + /// TODO: We might need to store intermediate states per-fact in the block for + /// later analysis. + LifetimeLattice transferBlock(const CFGBlock *Block, + LifetimeLattice EntryState) { + LifetimeLattice BlockState = EntryState; + llvm::ArrayRef<Fact *> Facts = AllFacts.getFacts(Block); + + for (const Fact *F : Facts) { + BlockState = transferFact(BlockState, F); + } + return BlockState; + } + +private: + LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) { + switch (F->getKind()) { + case Fact::Kind::Issue: + return transfer(In, *F->getAs<IssueFact>()); + case Fact::Kind::AssignOrigin: + return transfer(In, *F->getAs<AssignOriginFact>()); + // Expire and ReturnOfOrigin facts don't modify the Origins and the State. + case Fact::Kind::Expire: + case Fact::Kind::ReturnOfOrigin: + return In; + } + llvm_unreachable("Unknown fact kind"); + } + + /// A new loan is issued to the origin. Old loans are erased. + LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return LifetimeLattice( + Factory.OriginMapFact.add(In.Origins, OID, Factory.createLoanSet(LID))); + } + + /// The destination origin's loan set is replaced by the source's. + /// This implicitly "resets" the old loans of the destination. + LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + LoanSet SrcLoans = InState.getLoans(SrcOID, Factory); + return LifetimeLattice( + Factory.OriginMapFact.add(InState.Origins, DestOID, SrcLoans)); + } +}; +// ========================================================================= // +// Dataflow analysis +// ========================================================================= // + +/// Drives the intra-procedural dataflow analysis. +/// +/// Orchestrates the analysis by iterating over the CFG using a worklist +/// algorithm. It computes a fixed point by propagating the LifetimeLattice +/// state through each block until the state no longer changes. +/// TODO: Maybe use the dataflow framework! +class LifetimeDataflow { + const CFG &Cfg; + AnalysisDeclContext &AC; + LifetimeFactory LifetimeFact; + + Transferer Xfer; + + /// Stores the merged analysis state at the entry of each CFG block. + llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates; + /// Stores the analysis state at the exit of each CFG block, after the + /// transfer function has been applied. + llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates; + +public: + LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) + : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {} + + void run() { + llvm::TimeTraceScope TimeProfile("Lifetime Dataflow"); + ForwardDataflowWorklist Worklist(Cfg, AC); + const CFGBlock *Entry = &Cfg.getEntry(); + BlockEntryStates[Entry] = LifetimeLattice{}; + Worklist.enqueueBlock(Entry); + while (const CFGBlock *B = Worklist.dequeue()) { + LifetimeLattice EntryState = getEntryState(B); + LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState); + BlockExitStates[B] = ExitState; + + for (const CFGBlock *Successor : B->succs()) { + auto SuccIt = BlockEntryStates.find(Successor); + LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) + ? SuccIt->second + : LifetimeLattice{}; + LifetimeLattice NewSuccEntryState = + OldSuccEntryState.merge(ExitState, LifetimeFact); + if (SuccIt == BlockEntryStates.end() || + NewSuccEntryState != OldSuccEntryState) { + BlockEntryStates[Successor] = NewSuccEntryState; + Worklist.enqueueBlock(Successor); + } + } + } + } + + void dump() const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Dataflow results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = Cfg.getExit(); + getExitState(&B).dump(llvm::dbgs()); + } + + LifetimeLattice getEntryState(const CFGBlock *B) const { + auto It = BlockEntryStates.find(B); + if (It != BlockEntryStates.end()) { + return It->second; + } + return LifetimeLattice{}; + } + + LifetimeLattice getExitState(const CFGBlock *B) const { + auto It = BlockExitStates.find(B); + if (It != BlockExitStates.end()) { + return It->second; + } + return LifetimeLattice{}; + } +}; + +// ========================================================================= // +// TODO: Analysing dataflow results and error reporting. +// ========================================================================= // +} // anonymous namespace + +void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC) { + llvm::TimeTraceScope TimeProfile("Lifetime Analysis"); + FactsContext FactsCtx; + FactGenerator FactGen(Cfg, FactsCtx, AC); + FactGen.run(); + DEBUG_WITH_TYPE("LifetimeFacts", FactsCtx.Facts.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + LifetimeDataflow Dataflow(Cfg, FactsCtx.Facts, AC); + Dataflow.run(); + DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump()); +} +} // namespace clang \ No newline at end of file diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index d95844cfed614..dd4bede775a40 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -29,6 +29,7 @@ #include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h" #include "clang/Analysis/Analyses/CalledOnceCheck.h" #include "clang/Analysis/Analyses/Consumed.h" +#include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/Analysis/Analyses/ReachableCode.h" #include "clang/Analysis/Analyses/ThreadSafety.h" #include "clang/Analysis/Analyses/UninitializedValues.h" @@ -49,6 +50,7 @@ #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" #include <algorithm> #include <deque> #include <iterator> @@ -2842,6 +2844,12 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( } } + DEBUG_WITH_TYPE( + "ExperimentalLifetimeAnalysis", if (S.getLangOpts().CPlusPlus) { + if (CFG *cfg = AC.getCFG()) { + runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC); + } + }); // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && shouldAnalyzeCalledOnceParameters(Diags, D->getBeginLoc())) { diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp new file mode 100644 index 0000000000000..466bb7d9c3cd9 --- /dev/null +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -0,0 +1,362 @@ +// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow -Wreturn-stack-address-cfg %s 2>&1 | FileCheck %s + +struct MyObj { + int id; + ~MyObj() {} // Non-trivial destructor +}; + +// Simple Local Variable Address and Return +// CHECK-LABEL: Function: return_local_addr +MyObj* return_local_addr() { + MyObj x {10}; + MyObj* p = &x; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_X:[0-9]+]], OriginID: [[O_ADDR_X:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_X]]) + return p; +// CHECK: AssignOrigin (DestID: [[O_RET_VAL:[0-9]+]], SrcID: [[O_P]]) +// CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) +// CHECK: Expire (LoanID: [[L_X]]) +} +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]] +// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]] + + +// Pointer Assignment and Return +// CHECK-LABEL: Function: assign_and_return_local_addr +// CHECK-NEXT: Block B{{[0-9]+}}: +MyObj* assign_and_return_local_addr() { + MyObj y{20}; + MyObj* ptr1 = &y; +// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], OriginID: [[O_ADDR_Y:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PTR1:[0-9]+]], SrcID: [[O_ADDR_Y]]) + MyObj* ptr2 = ptr1; +// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL:[0-9]+]], SrcID: [[O_PTR1]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2:[0-9]+]], SrcID: [[O_PTR1_RVAL]]) + ptr2 = ptr1; +// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_2:[0-9]+]], SrcID: [[O_PTR1]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR1_RVAL_2]]) + ptr2 = ptr2; // Self assignment. +// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL:[0-9]+]], SrcID: [[O_PTR2]]) +// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR2_RVAL]]) + return ptr2; +// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_2:[0-9]+]], SrcID: [[O_PTR2]]) +// CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) +// CHECK: Expire (LoanID: [[L_Y]]) +} +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]] + + +// Return of Non-Pointer Type +// CHECK-LABEL: Function: return_int_val +// CHECK-NEXT: Block B{{[0-9]+}}: +int return_int_val() { + int x = 10; + return x; +} +// CHECK-NEXT: End of Block +// CHECK: Dataflow results: + +// CHECK: <empty> + + +// Loan Expiration (Automatic Variable, C++) +// CHECK-LABEL: Function: loan_expires_cpp +// CHECK-NEXT: Block B{{[0-9]+}}: +void loan_expires_cpp() { + MyObj obj{1}; + MyObj* pObj = &obj; +// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], OriginID: [[O_ADDR_OBJ:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) +// CHECK: Expire (LoanID: [[L_OBJ]]) +} +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]] +// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]] + + +// FIXME: No expire for Trivial Destructors +// CHECK-LABEL: Function: loan_expires_trivial +// CHECK-NEXT: Block B{{[0-9]+}}: +void loan_expires_trivial() { + int trivial_obj = 1; + int* pTrivialObj = &trivial_obj; +// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], OriginID: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_PTOBJ:[0-9]+]], SrcID: [[O_ADDR_TRIVIAL_OBJ]]) +// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]]) +// CHECK-NEXT: End of Block + // FIXME: Add check for Expire once trivial destructors are handled for expiration. +} +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]] +// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]] + + +// CHECK-LABEL: Function: conditional +void conditional(bool condition) { + int a = 5; + int b = 10; + int* p = nullptr; + + if (condition) + p = &a; + // CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]]) + else + p = &b; + // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]]) +} +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] +// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]] + + +// CHECK-LABEL: Function: pointers_in_a_cycle +void pointers_in_a_cycle(bool condition) { + MyObj v1{1}; + MyObj v2{1}; + MyObj v3{1}; + + MyObj* p1 = &v1; + MyObj* p2 = &v2; + MyObj* p3 = &v3; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], OriginID: [[O_ADDR_V1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P1:[0-9]+]], SrcID: [[O_ADDR_V1]]) +// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], OriginID: [[O_ADDR_V2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P2:[0-9]+]], SrcID: [[O_ADDR_V2]]) +// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], OriginID: [[O_ADDR_V3:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P3:[0-9]+]], SrcID: [[O_ADDR_V3]]) + + while (condition) { + MyObj* temp = p1; + p1 = p2; + p2 = p3; + p3 = temp; +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (DestID: [[O_P1_RVAL:[0-9]+]], SrcID: [[O_P1]]) +// CHECK: AssignOrigin (DestID: [[O_TEMP:[0-9]+]], SrcID: [[O_P1_RVAL]]) +// CHECK: AssignOrigin (DestID: [[O_P2_RVAL:[0-9]+]], SrcID: [[O_P2]]) +// CHECK: AssignOrigin (DestID: [[O_P1]], SrcID: [[O_P2_RVAL]]) +// CHECK: AssignOrigin (DestID: [[O_P3_RVAL:[0-9]+]], SrcID: [[O_P3]]) +// CHECK: AssignOrigin (DestID: [[O_P2]], SrcID: [[O_P3_RVAL]]) +// CHECK: AssignOrigin (DestID: [[O_TEMP_RVAL:[0-9]+]], SrcID: [[O_TEMP]]) +// CHECK: AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]]) + } +} +// At the end of the analysis, the origins for the pointers involved in the cycle +// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point. +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]] + + +// CHECK-LABEL: Function: overwrite_origin +void overwrite_origin() { + MyObj s1; + MyObj s2; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + p = &s2; +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) +// CHECK: Expire (LoanID: [[L_S2]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} +// CHECK: Dataflow results: + +// CHECK: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]] + + +// CHECK-LABEL: Function: reassign_to_null +void reassign_to_null() { + MyObj s1; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + p = nullptr; +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} +// FIXME: Have a better representation for nullptr than just an empty origin. +// It should be a separate loan and origin kind. +// CHECK: Dataflow results: + +// CHECK: Origin [[O_P]] contains no loans + + +// CHECK-LABEL: Function: reassign_in_if +void reassign_in_if(bool condition) { + MyObj s1; + MyObj s2; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + if (condition) { + p = &s2; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + } +// CHECK: Block B{{[0-9]+}}: +// CHECK: Expire (LoanID: [[L_S2]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} +// CHECK: Dataflow results: + +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] + + +// CHECK-LABEL: Function: assign_in_switch +void assign_in_switch(int mode) { + MyObj s1; + MyObj s2; + MyObj s3; + MyObj* p = nullptr; +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) + switch (mode) { + case 1: + p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S1]]) + break; + case 2: + p = &s2; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + break; + default: + p = &s3; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], OriginID: [[O_ADDR_S3:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S3]]) + break; + } +// CHECK: Block B{{[0-9]+}}: +// CHECK-DAG: Expire (LoanID: [[L_S3]]) +// CHECK-DAG: Expire (LoanID: [[L_S2]]) +// CHECK-DAG: Expire (LoanID: [[L_S1]]) +} +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]] +// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]] + + +// CHECK-LABEL: Function: loan_in_loop +void loan_in_loop(bool condition) { + MyObj* p = nullptr; + // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) + while (condition) { + MyObj inner; + p = &inner; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) +// CHECK: Expire (LoanID: [[L_INNER]]) + } +} +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] +// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] + + +// CHECK-LABEL: Function: loop_with_break +void loop_with_break(int count) { + MyObj s1; + MyObj s2; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + for (int i = 0; i < count; ++i) { + if (i == 5) { + p = &s2; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + break; + } + } +// CHECK: Block B{{[0-9]+}}: +// CHECK: Expire (LoanID: [[L_S2]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} + +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] + + +// CHECK-LABEL: Function: nested_scopes +void nested_scopes() { + MyObj* p = nullptr; +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) + { + MyObj outer; + p = &outer; +// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], OriginID: [[O_ADDR_OUTER:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_OUTER]]) + { + MyObj inner; + p = &inner; +// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) + } +// CHECK: Expire (LoanID: [[L_INNER]]) + } +// CHECK: Expire (LoanID: [[L_OUTER]]) +} + +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] +// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] +// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]] >From 44fcea1b6c0c84205ca9eec231978e33099c95f5 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Thu, 12 Jun 2025 12:54:41 +0000 Subject: [PATCH 2/2] address comments --- clang/lib/Analysis/LifetimeSafety.cpp | 381 ++++++++++-------- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +- .../Sema/warn-lifetime-safety-dataflow.cpp | 28 +- 3 files changed, 219 insertions(+), 194 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 8dbb4dc37026c..1602b52c80ccd 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -6,159 +6,187 @@ #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableMap.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TimeProfiler.h" -#include <vector> +#include <cstdint> namespace clang { namespace { -struct Point { - const clang::CFGBlock *Block; - /// Index into Block->Elements(). - unsigned ElementIndex; - - Point(const clang::CFGBlock *B = nullptr, unsigned Idx = 0) - : Block(B), ElementIndex(Idx) {} - - bool operator==(const Point &Other) const { - return Block == Other.Block && ElementIndex == Other.ElementIndex; - } -}; - /// Represents the storage location being borrowed, e.g., a specific stack /// variable. -/// TODO: Handle member accesseslike `s.y`. -struct Path { +struct AccessPath { const clang::ValueDecl *D; enum class Kind : uint8_t { StackVariable, - Heap, // TODO: Handle. - Field, // TODO: Handle. - ArrayElement, // TODO: Handle. - TemporaryObject, // TODO: Handle. - StaticOrGlobal, // TODO: Handle. + Temporary, // TODO: Handle. + Field, // TODO: Handle like `s.y`. + Heap, // TODO: Handle. + ArrayElement, // TODO: Handle. + Static, // TODO: Handle. }; Kind PathKind; - Path(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {} + AccessPath(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {} +}; + +/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. +/// Used for giving ID to loans and origins. +template <typename Tag> struct ID { + uint32_t Value = 0; + + bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; } + bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); } + bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; } + ID<Tag> &operator++() { + ++Value; + return *this; + } + void Profile(llvm::FoldingSetNodeID &IDBuilder) const { + IDBuilder.AddInteger(Value); + } }; -using LoanID = uint32_t; -using OriginID = uint32_t; +template <typename Tag> +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) { + return OS << ID.Value; +} + +struct LoanTag {}; +struct OriginTag {}; + +using LoanID = ID<LoanTag>; +using OriginID = ID<OriginTag>; /// Information about a single borrow, or "Loan". A loan is created when a /// reference or pointer is taken. -struct LoanInfo { +struct Loan { /// TODO: Represent opaque loans. /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it /// is represented as empty LoanSet LoanID ID; - Path SourcePath; + AccessPath Path; SourceLocation IssueLoc; - LoanInfo(LoanID id, Path path, SourceLocation loc) - : ID(id), SourcePath(path), IssueLoc(loc) {} + Loan(LoanID id, AccessPath path, SourceLocation loc) + : ID(id), Path(path), IssueLoc(loc) {} }; -enum class OriginKind : uint8_t { Variable, ExpressionResult }; - /// An Origin is a symbolic identifier that represents the set of possible /// loans a pointer-like object could hold at any given time. /// TODO: Also represent Origins of complex types (fields, inner types). -struct OriginInfo { +struct Origin { OriginID ID; - OriginKind Kind; - union { - const clang::ValueDecl *Decl; - const clang::Expr *Expression; - }; - OriginInfo(OriginID id, OriginKind kind, const clang::ValueDecl *D) - : ID(id), Kind(kind), Decl(D) {} - OriginInfo(OriginID id, OriginKind kind, const clang::Expr *E) - : ID(id), Kind(kind), Expression(E) {} + llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr; + + Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {} + Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {} + + const clang::ValueDecl *getDecl() const { + return Ptr.dyn_cast<const clang::ValueDecl *>(); + } + const clang::Expr *getExpr() const { + return Ptr.dyn_cast<const clang::Expr *>(); + } }; class LoanManager { public: LoanManager() = default; - LoanInfo &addLoanInfo(Path path, SourceLocation loc) { - NextLoanIDVal++; - AllLoans.emplace_back(NextLoanIDVal, path, loc); + Loan &addLoan(AccessPath path, SourceLocation loc) { + ++NextLoanID; + AllLoans.emplace_back(NextLoanID, path, loc); return AllLoans.back(); } - const LoanInfo *getLoanInfo(LoanID id) const { - if (id < AllLoans.size()) - return &AllLoans[id]; - return nullptr; + const Loan &getLoan(LoanID id) const { + assert(id.Value < AllLoans.size()); + return AllLoans[id.Value]; } - llvm::ArrayRef<LoanInfo> getLoanInfos() const { return AllLoans; } + llvm::ArrayRef<Loan> getLoans() const { return AllLoans; } private: - LoanID NextLoanIDVal = 0; - llvm::SmallVector<LoanInfo> AllLoans; + LoanID NextLoanID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Loan> AllLoans; }; class OriginManager { public: OriginManager() = default; - OriginID getNextOriginID() { return NextOriginIDVal++; } - OriginInfo &addOriginInfo(OriginID id, const clang::ValueDecl *D) { - assert(D != nullptr); - AllOrigins.emplace_back(id, OriginKind::Variable, D); + OriginID getNextOriginID() { return ++NextOriginID; } + Origin &addOrigin(OriginID id, const clang::ValueDecl &D) { + AllOrigins.emplace_back(id, &D); return AllOrigins.back(); } - OriginInfo &addOriginInfo(OriginID id, const clang::Expr *E) { - assert(E != nullptr); - AllOrigins.emplace_back(id, OriginKind::ExpressionResult, E); + Origin &addOrigin(OriginID id, const clang::Expr &E) { + AllOrigins.emplace_back(id, &E); return AllOrigins.back(); } - OriginID getOrCreate(const Expr *E) { - auto It = ExprToOriginID.find(E); + OriginID get(const Expr &E) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) { + // Origin of DeclRefExpr is that of the declaration it refers to. + return get(*DRE->getDecl()); + } + auto It = ExprToOriginID.find(&E); + assert(It != ExprToOriginID.end()); + return It->second; + } + + OriginID get(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); + assert(It != DeclToOriginID.end()); + return It->second; + } + + OriginID getOrCreate(const Expr &E) { + auto It = ExprToOriginID.find(&E); if (It != ExprToOriginID.end()) return It->second; - if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) { // Origin of DeclRefExpr is that of the declaration it refers to. - return getOrCreate(DRE->getDecl()); + return getOrCreate(*DRE->getDecl()); } OriginID NewID = getNextOriginID(); - addOriginInfo(NewID, E); - ExprToOriginID[E] = NewID; + addOrigin(NewID, E); + ExprToOriginID[&E] = NewID; return NewID; } - const OriginInfo *getOriginInfo(OriginID id) const { - if (id < AllOrigins.size()) - return &AllOrigins[id]; - return nullptr; + const Origin &getOrigin(OriginID ID) const { + assert(ID.Value < AllOrigins.size()); + return AllOrigins[ID.Value]; } - llvm::ArrayRef<OriginInfo> getOriginInfos() const { return AllOrigins; } + llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; } - OriginID getOrCreate(const ValueDecl *D) { - auto It = DeclToOriginID.find(D); + OriginID getOrCreate(const ValueDecl &D) { + auto It = DeclToOriginID.find(&D); if (It != DeclToOriginID.end()) return It->second; OriginID NewID = getNextOriginID(); - addOriginInfo(NewID, D); - DeclToOriginID[D] = NewID; + addOrigin(NewID, D); + DeclToOriginID[&D] = NewID; return NewID; } private: - OriginID NextOriginIDVal = 0; - llvm::SmallVector<OriginInfo> AllOrigins; + OriginID NextOriginID{0}; + /// TODO(opt): Profile and evaluate the usefullness of small buffer + /// optimisation. + llvm::SmallVector<Origin> AllOrigins; llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID; llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID; }; @@ -183,13 +211,11 @@ class Fact { Kind K; protected: - Point P; - Fact(Kind K, Point Pt) : K(K), P(Pt) {} + Fact(Kind K) : K(K) {} public: virtual ~Fact() = default; Kind getKind() const { return K; } - Point getPoint() const { return P; } template <typename T> const T *getAs() const { if (T::classof(this)) @@ -198,8 +224,7 @@ class Fact { } virtual void dump(llvm::raw_ostream &OS) const { - OS << "Fact (Kind: " << static_cast<int>(K) << ", Point: B" - << P.Block->getBlockID() << ":" << P.ElementIndex << ")\n"; + OS << "Fact (Kind: " << static_cast<int>(K) << ")\n"; } }; @@ -208,11 +233,11 @@ class IssueFact : public Fact { OriginID OID; public: - IssueFact(LoanID LID, OriginID OID, Point Pt) - : Fact(Kind::Issue, Pt), LID(LID), OID(OID) {} + static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } + + IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {} LoanID getLoanID() const { return LID; } OriginID getOriginID() const { return OID; } - static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; } void dump(llvm::raw_ostream &OS) const override { OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID() << ")\n"; @@ -223,9 +248,10 @@ class ExpireFact : public Fact { LoanID LID; public: - ExpireFact(LoanID LID, Point Pt) : Fact(Kind::Expire, Pt), LID(LID) {} - LoanID getLoanID() const { return LID; } static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; } + + ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {} + LoanID getLoanID() const { return LID; } void dump(llvm::raw_ostream &OS) const override { OS << "Expire (LoanID: " << getLoanID() << ")\n"; } @@ -236,13 +262,14 @@ class AssignOriginFact : public Fact { OriginID OIDSrc; public: - AssignOriginFact(OriginID OIDDest, OriginID OIDSrc, Point Pt) - : Fact(Kind::AssignOrigin, Pt), OIDDest(OIDDest), OIDSrc(OIDSrc) {} - OriginID getDestOriginID() const { return OIDDest; } - OriginID getSrcOriginID() const { return OIDSrc; } static bool classof(const Fact *F) { return F->getKind() == Kind::AssignOrigin; } + + AssignOriginFact(OriginID OIDDest, OriginID OIDSrc) + : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {} + OriginID getDestOriginID() const { return OIDDest; } + OriginID getSrcOriginID() const { return OIDSrc; } void dump(llvm::raw_ostream &OS) const override { OS << "AssignOrigin (DestID: " << getDestOriginID() << ", SrcID: " << getSrcOriginID() << ")\n"; @@ -253,12 +280,12 @@ class ReturnOfOriginFact : public Fact { OriginID OID; public: - ReturnOfOriginFact(OriginID OID, Point Pt) - : Fact(Kind::ReturnOfOrigin, Pt), OID(OID) {} - OriginID getReturnedOriginID() const { return OID; } static bool classof(const Fact *F) { return F->getKind() == Kind::ReturnOfOrigin; } + + ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {} + OriginID getReturnedOriginID() const { return OID; } void dump(llvm::raw_ostream &OS) const override { OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n"; } @@ -266,15 +293,17 @@ class ReturnOfOriginFact : public Fact { class FactManager { public: - llvm::ArrayRef<Fact *> getFacts(const CFGBlock *B) const { + llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const { auto It = BlockToFactsMap.find(B); if (It != BlockToFactsMap.end()) return It->second; return {}; } - void addFact(const CFGBlock *B, Fact *NewFact) { - BlockToFactsMap[B].push_back(NewFact); + void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { + if (!NewFacts.empty()) { + BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); + } } template <typename FactType, typename... Args> @@ -308,75 +337,70 @@ class FactManager { } } + LoanManager &getLoanMgr() { return LoanMgr; } + OriginManager &getOriginMgr() { return OriginMgr; } + private: - llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<Fact *>> + LoanManager LoanMgr; + OriginManager OriginMgr; + llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>> BlockToFactsMap; llvm::BumpPtrAllocator FactAllocator; }; -struct FactsContext { - FactManager Facts; - LoanManager Loans; - OriginManager Origins; -}; - class FactGenerator : public ConstStmtVisitor<FactGenerator> { public: - FactGenerator(const CFG &Cfg, FactsContext &FactsCtx, AnalysisDeclContext &AC) - : FactsCtx(FactsCtx), Cfg(Cfg), AC(AC), CurrentBlock(nullptr) {} + FactGenerator(const CFG &Cfg, FactManager &FactMgr, AnalysisDeclContext &AC) + : FactMgr(FactMgr), Cfg(Cfg), AC(AC) {} void run() { - llvm::TimeTraceScope TimeProfile("Fact Generation"); - // Iterate through the CFG blocks in pre-order to ensure that + llvm::TimeTraceScope TimeProfile("FactGenerator"); + // Iterate through the CFG blocks in reverse post-order to ensure that // initializations and destructions are processed in the correct sequence. - // TODO: A pre-order traversal utility should be provided by Dataflow - // framework. + // TODO: A reverse post-order traversal utility should be provided by + // Dataflow framework. ForwardDataflowWorklist Worklist(Cfg, AC); for (const CFGBlock *B : Cfg.const_nodes()) Worklist.enqueueBlock(B); while (const CFGBlock *Block = Worklist.dequeue()) { - CurrentBlock = Block; + CurrentBlockFacts.clear(); for (unsigned I = 0; I < Block->size(); ++I) { const CFGElement &Element = Block->Elements[I]; - CurrentPoint = Point(Block, I); if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>()) Visit(CS->getStmt()); else if (std::optional<CFGAutomaticObjDtor> DtorOpt = Element.getAs<CFGAutomaticObjDtor>()) handleDestructor(*DtorOpt); } + FactMgr.addBlockFacts(Block, CurrentBlockFacts); } } void VisitDeclStmt(const DeclStmt *DS) { - for (const Decl *D : DS->decls()) { - if (const auto *VD = dyn_cast<VarDecl>(D)) { - if (hasOrigin(VD->getType())) { - if (const Expr *InitExpr = VD->getInit()) { - OriginID DestOID = FactsCtx.Origins.getOrCreate(VD); - OriginID SrcOID = FactsCtx.Origins.getOrCreate(InitExpr); - FactsCtx.Facts.addFact(CurrentBlock, - FactsCtx.Facts.createFact<AssignOriginFact>( - DestOID, SrcOID, CurrentPoint)); - } - } - } - } + for (const Decl *D : DS->decls()) + if (const auto *VD = dyn_cast<VarDecl>(D)) + if (hasOrigin(VD->getType())) + if (const Expr *InitExpr = VD->getInit()) + addAssignOriginFact(*VD, *InitExpr); + } + + void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) { + /// TODO: Handle nullptr expr as a special 'null' loan. Uninintialed + /// pointers can use the same type of loan. + FactMgr.getOriginMgr().getOrCreate(*N); } void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { if (!hasOrigin(ICE->getType())) return; + Visit(ICE->getSubExpr()); + /// TODO: Consider if this is actually useful in practice. Alternatively, we + /// could directly use the sub-expression's OriginID instead of creating a + /// new one. // An ImplicitCastExpr node itself gets an origin, which flows from the // origin of its sub-expression (after stripping its own parens/casts). - if (ICE->getCastKind() == CK_LValueToRValue) { - OriginID IceOID = FactsCtx.Origins.getOrCreate(ICE); - OriginID SubExprOID = FactsCtx.Origins.getOrCreate(ICE->getSubExpr()); - FactsCtx.Facts.addFact(CurrentBlock, - FactsCtx.Facts.createFact<AssignOriginFact>( - IceOID, SubExprOID, CurrentPoint)); - } + addAssignOriginFact(*ICE, *ICE->getSubExpr()); } void VisitUnaryOperator(const UnaryOperator *UO) { @@ -386,13 +410,12 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { // Check if it's a local variable. if (VD->hasLocalStorage()) { - OriginID OID = FactsCtx.Origins.getOrCreate(UO); - Path AddrOfLocalVarPath(VD, Path::Kind::StackVariable); - LoanInfo &Loan = FactsCtx.Loans.addLoanInfo(AddrOfLocalVarPath, - UO->getOperatorLoc()); - FactsCtx.Facts.addFact(CurrentBlock, - FactsCtx.Facts.createFact<IssueFact>( - Loan.ID, OID, CurrentPoint)); + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); + AccessPath AddrOfLocalVarPath(VD, AccessPath::Kind::StackVariable); + Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, + UO->getOperatorLoc()); + CurrentBlockFacts.push_back( + FactMgr.createFact<IssueFact>(L.ID, OID)); } } } @@ -402,10 +425,9 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { void VisitReturnStmt(const ReturnStmt *RS) { if (const Expr *RetExpr = RS->getRetValue()) { if (hasOrigin(RetExpr->getType())) { - OriginID OID = FactsCtx.Origins.getOrCreate(RetExpr); - FactsCtx.Facts.addFact( - CurrentBlock, - FactsCtx.Facts.createFact<ReturnOfOriginFact>(OID, CurrentPoint)); + OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr); + CurrentBlockFacts.push_back( + FactMgr.createFact<ReturnOfOriginFact>(OID)); } } } @@ -419,17 +441,11 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { // LHS must be a pointer/reference type that can be an origin. // RHS must also represent an origin (either another pointer/ref or an // address-of). - if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) { + if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) if (const auto *VD_LHS = dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl()); - VD_LHS && hasOrigin(VD_LHS->getType())) { - OriginID DestOID = FactsCtx.Origins.getOrCreate(VD_LHS); - OriginID SrcOID = FactsCtx.Origins.getOrCreate(RHSExpr); - FactsCtx.Facts.addFact(CurrentBlock, - FactsCtx.Facts.createFact<AssignOriginFact>( - DestOID, SrcOID, CurrentPoint)); - } - } + VD_LHS && hasOrigin(VD_LHS->getType())) + addAssignOriginFact(*VD_LHS, *RHSExpr); } } @@ -437,33 +453,40 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { // Check if a type have an origin. bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); } + template <typename Destination, typename Source> + void addAssignOriginFact(const Destination &D, const Source &S) { + OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D); + OriginID SrcOID = FactMgr.getOriginMgr().get(S); + CurrentBlockFacts.push_back( + FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID)); + } + void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) { /// TODO: Also handle trivial destructors (e.g., for `int` - // variables) which will never have a CFGAutomaticObjDtor node. + /// variables) which will never have a CFGAutomaticObjDtor node. /// TODO: Handle loans to temporaries. const VarDecl *DestructedVD = DtorOpt.getVarDecl(); if (!DestructedVD) return; // Iterate through all loans to see if any expire. - for (const LoanInfo &Loan : FactsCtx.Loans.getLoanInfos()) { - const Path &LoanPath = Loan.SourcePath; + /// TODO(opt): Do better than a linear search to find loans associated with + /// 'DestructedVD'. + for (const Loan &L : FactMgr.getLoanMgr().getLoans()) { + const AccessPath &LoanPath = L.Path; // Check if the loan is for a stack variable and if that variable // is the one being destructed. - if (LoanPath.PathKind == Path::Kind::StackVariable) { + if (LoanPath.PathKind == AccessPath::Kind::StackVariable) { if (LoanPath.D == DestructedVD) { - FactsCtx.Facts.addFact( - CurrentBlock, - FactsCtx.Facts.createFact<ExpireFact>(Loan.ID, CurrentPoint)); + CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID)); } } } } - FactsContext &FactsCtx; + FactManager &FactMgr; const CFG &Cfg; AnalysisDeclContext &AC; - const CFGBlock *CurrentBlock; - Point CurrentPoint; + llvm::SmallVector<Fact *> CurrentBlockFacts; }; // ========================================================================= // @@ -476,7 +499,7 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { using LoanSet = llvm::ImmutableSet<LoanID>; using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; -/// A context object to hold the factories for immutable collections, ensuring +/// An object to hold the factories for immutable collections, ensuring /// that all created states share the same underlying memory management. struct LifetimeFactory { OriginLoanMap::Factory OriginMapFact; @@ -512,28 +535,28 @@ struct LifetimeLattice { return Factory.LoanSetFact.getEmptySet(); } - /// Computes the union of two lattices by performing a key-wise merge of + /// Computes the union of two lattices by performing a key-wise join of /// their OriginLoanMaps. - LifetimeLattice merge(const LifetimeLattice &Other, - LifetimeFactory &Factory) const { + LifetimeLattice join(const LifetimeLattice &Other, + LifetimeFactory &Factory) const { /// Merge the smaller map into the larger one ensuring we iterate over the /// smaller map. if (Origins.getHeight() < Other.Origins.getHeight()) - return Other.merge(*this, Factory); + return Other.join(*this, Factory); - OriginLoanMap MergedState = Origins; + OriginLoanMap JoinedState = Origins; // For each origin in the other map, union its loan set with ours. for (const auto &Entry : Other.Origins) { OriginID OID = Entry.first; LoanSet OtherLoanSet = Entry.second; - MergedState = Factory.OriginMapFact.add( - MergedState, OID, - merge(getLoans(OID, Factory), OtherLoanSet, Factory)); + JoinedState = Factory.OriginMapFact.add( + JoinedState, OID, + join(getLoans(OID, Factory), OtherLoanSet, Factory)); } - return LifetimeLattice(MergedState); + return LifetimeLattice(JoinedState); } - LoanSet merge(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { + LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { /// Merge the smaller set into the larger one ensuring we iterate over the /// smaller set. if (a.getHeight() < b.getHeight()) @@ -542,7 +565,7 @@ struct LifetimeLattice { for (LoanID LID : b) { /// TODO(opt): Profiling shows that this loop is a major performance /// bottleneck. Investigate using a BitVector to represent the set of - /// loans for improved merge performance. + /// loans for improved join performance. Result = Factory.LoanSetFact.add(Result, LID); } return Result; @@ -579,7 +602,7 @@ class Transferer { LifetimeLattice transferBlock(const CFGBlock *Block, LifetimeLattice EntryState) { LifetimeLattice BlockState = EntryState; - llvm::ArrayRef<Fact *> Facts = AllFacts.getFacts(Block); + llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(Block); for (const Fact *F : Facts) { BlockState = transferFact(BlockState, F); @@ -664,7 +687,7 @@ class LifetimeDataflow { ? SuccIt->second : LifetimeLattice{}; LifetimeLattice NewSuccEntryState = - OldSuccEntryState.merge(ExitState, LifetimeFact); + OldSuccEntryState.join(ExitState, LifetimeFact); if (SuccIt == BlockEntryStates.end() || NewSuccEntryState != OldSuccEntryState) { BlockEntryStates[Successor] = NewSuccEntryState; @@ -707,10 +730,12 @@ class LifetimeDataflow { void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, AnalysisDeclContext &AC) { llvm::TimeTraceScope TimeProfile("Lifetime Analysis"); - FactsContext FactsCtx; - FactGenerator FactGen(Cfg, FactsCtx, AC); + DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), + /*ShowColors=*/true)); + FactManager FactMgr; + FactGenerator FactGen(Cfg, FactMgr, AC); FactGen.run(); - DEBUG_WITH_TYPE("LifetimeFacts", FactsCtx.Facts.dump(Cfg, AC)); + DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); /// TODO(opt): Consider optimizing individual blocks before running the /// dataflow analysis. @@ -721,8 +746,8 @@ void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, /// blocks; only Decls are visible. Therefore, loans in a block that /// never reach an Origin associated with a Decl can be safely dropped by /// the analysis. - LifetimeDataflow Dataflow(Cfg, FactsCtx.Facts, AC); + LifetimeDataflow Dataflow(Cfg, FactMgr, AC); Dataflow.run(); DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump()); } -} // namespace clang \ No newline at end of file +} // namespace clang diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index dd4bede775a40..a6e82e9572df4 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2845,7 +2845,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( } DEBUG_WITH_TYPE( - "ExperimentalLifetimeAnalysis", if (S.getLangOpts().CPlusPlus) { + "ExperimentalLifetimeAnalysis", + // TODO: Enable for other languages once the analysis is stable. + if (S.getLangOpts().CPlusPlus) { if (CFG *cfg = AC.getCFG()) { runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC); } diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 466bb7d9c3cd9..2d732e56c0872 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow -Wreturn-stack-address-cfg %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow %s 2>&1 | FileCheck %s struct MyObj { int id; @@ -19,7 +19,6 @@ MyObj* return_local_addr() { // CHECK: Expire (LoanID: [[L_X]]) } // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]] // CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]] @@ -48,7 +47,6 @@ MyObj* assign_and_return_local_addr() { // CHECK: Expire (LoanID: [[L_Y]]) } // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]] // CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]] // CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]] @@ -67,7 +65,6 @@ int return_int_val() { } // CHECK-NEXT: End of Block // CHECK: Dataflow results: - // CHECK: <empty> @@ -82,7 +79,6 @@ void loan_expires_cpp() { // CHECK: Expire (LoanID: [[L_OBJ]]) } // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]] // CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]] @@ -100,7 +96,6 @@ void loan_expires_trivial() { // FIXME: Add check for Expire once trivial destructors are handled for expiration. } // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]] // CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]] @@ -119,13 +114,17 @@ void conditional(bool condition) { p = &b; // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]]) // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]]) + int *q = p; + // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) + // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) } // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]] -// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] // CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]] +// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]] +// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]] // CHECK-LABEL: Function: pointers_in_a_cycle @@ -164,7 +163,6 @@ void pointers_in_a_cycle(bool condition) { // At the end of the analysis, the origins for the pointers involved in the cycle // (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point. // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]] // CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]] // CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]] @@ -197,7 +195,6 @@ void overwrite_origin() { // CHECK: Expire (LoanID: [[L_S1]]) } // CHECK: Dataflow results: - // CHECK: Origin [[O_P]] contains Loan [[L_S2]] // CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]] @@ -216,7 +213,6 @@ void reassign_to_null() { // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. // CHECK: Dataflow results: - // CHECK: Origin [[O_P]] contains no loans @@ -239,7 +235,6 @@ void reassign_in_if(bool condition) { // CHECK: Expire (LoanID: [[L_S1]]) } // CHECK: Dataflow results: - // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] // CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] // CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] @@ -253,7 +248,8 @@ void assign_in_switch(int mode) { MyObj s3; MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) switch (mode) { case 1: p = &s1; @@ -291,7 +287,8 @@ void assign_in_switch(int mode) { // CHECK-LABEL: Function: loan_in_loop void loan_in_loop(bool condition) { MyObj* p = nullptr; - // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) while (condition) { MyObj inner; p = &inner; @@ -339,7 +336,8 @@ void loop_with_break(int count) { void nested_scopes() { MyObj* p = nullptr; // CHECK: Block B{{[0-9]+}}: -// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) { MyObj outer; p = &outer; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits