https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/147295
>From 2e4261b02b6230a8c79f01a673cc3030cfff3ea7 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Sun, 6 Jul 2025 19:12:55 +0000 Subject: [PATCH 1/6] [LifetimeSafety] Propagate loans using dataflow analysis --- clang/lib/Analysis/LifetimeSafety.cpp | 255 +++++++++++++++++- .../Sema/warn-lifetime-safety-dataflow.cpp | 186 +++++++++++++ 2 files changed, 440 insertions(+), 1 deletion(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 3fe30e36ebd0f..7870352f0287a 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -491,7 +491,247 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { }; // ========================================================================= // -// TODO: Run dataflow analysis to propagate loans, analyse and error reporting. +// The Dataflow Lattice +// ========================================================================= // + +// Using LLVM's immutable collections is efficient for dataflow analysis +// as it avoids deep copies during state transitions. +// TODO(opt): Consider using a bitset to represent the set of loans. +using LoanSet = llvm::ImmutableSet<LoanID>; +using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>; + +/// An object to hold the factories for immutable collections, ensuring +/// that all created states share the same underlying memory management. +struct LifetimeFactory { + OriginLoanMap::Factory OriginMapFact; + LoanSet::Factory LoanSetFact; + + LoanSet createLoanSet(LoanID LID) { + return LoanSetFact.add(LoanSetFact.getEmptySet(), LID); + } +}; + +/// LifetimeLattice represents the state of our analysis at a given program +/// point. It is an immutable object, and all operations produce a new +/// instance rather than modifying the existing one. +struct LifetimeLattice { + /// The map from an origin to the set of loans it contains. + /// TODO(opt): To reduce the lattice size, propagate origins of declarations, + /// not expressions, because expressions are not visible across blocks. + OriginLoanMap Origins = OriginLoanMap(nullptr); + + explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {} + LifetimeLattice() = default; + + bool operator==(const LifetimeLattice &Other) const { + return Origins == Other.Origins; + } + bool operator!=(const LifetimeLattice &Other) const { + return !(*this == Other); + } + + LoanSet getLoans(OriginID OID, LifetimeFactory &Factory) const { + if (auto *Loans = Origins.lookup(OID)) + return *Loans; + return Factory.LoanSetFact.getEmptySet(); + } + + /// Computes the union of two lattices by performing a key-wise join of + /// their OriginLoanMaps. + // TODO(opt): This key-wise join is a performance bottleneck. A more + // efficient merge could be implemented using a Patricia Trie or HAMT + // instead of the current AVL-tree-based ImmutableMap. + LifetimeLattice join(const LifetimeLattice &Other, + LifetimeFactory &Factory) const { + /// Merge the smaller map into the larger one ensuring we iterate over the + /// smaller map. + if (Origins.getHeight() < Other.Origins.getHeight()) + return Other.join(*this, Factory); + + OriginLoanMap JoinedState = Origins; + // For each origin in the other map, union its loan set with ours. + for (const auto &Entry : Other.Origins) { + OriginID OID = Entry.first; + LoanSet OtherLoanSet = Entry.second; + JoinedState = Factory.OriginMapFact.add( + JoinedState, OID, + join(getLoans(OID, Factory), OtherLoanSet, Factory)); + } + return LifetimeLattice(JoinedState); + } + + LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const { + /// Merge the smaller set into the larger one ensuring we iterate over the + /// smaller set. + if (a.getHeight() < b.getHeight()) + std::swap(a, b); + LoanSet Result = a; + for (LoanID LID : b) { + /// TODO(opt): Profiling shows that this loop is a major performance + /// bottleneck. Investigate using a BitVector to represent the set of + /// loans for improved join performance. + Result = Factory.LoanSetFact.add(Result, LID); + } + return Result; + } + + void dump(llvm::raw_ostream &OS) const { + OS << "LifetimeLattice State:\n"; + if (Origins.isEmpty()) + OS << " <empty>\n"; + for (const auto &Entry : Origins) { + if (Entry.second.isEmpty()) + OS << " Origin " << Entry.first << " contains no loans\n"; + for (const LoanID &LID : Entry.second) + OS << " Origin " << Entry.first << " contains Loan " << LID << "\n"; + } + } +}; + +// ========================================================================= // +// The Transfer Function +// ========================================================================= // +class Transferer { + FactManager &AllFacts; + LifetimeFactory &Factory; + +public: + explicit Transferer(FactManager &F, LifetimeFactory &Factory) + : AllFacts(F), Factory(Factory) {} + + /// Computes the exit state of a block by applying all its facts sequentially + /// to a given entry state. + /// TODO: We might need to store intermediate states per-fact in the block for + /// later analysis. + LifetimeLattice transferBlock(const CFGBlock *Block, + LifetimeLattice EntryState) { + LifetimeLattice BlockState = EntryState; + llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(Block); + + for (const Fact *F : Facts) { + BlockState = transferFact(BlockState, F); + } + return BlockState; + } + +private: + LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) { + switch (F->getKind()) { + case Fact::Kind::Issue: + return transfer(In, *F->getAs<IssueFact>()); + case Fact::Kind::AssignOrigin: + return transfer(In, *F->getAs<AssignOriginFact>()); + // Expire and ReturnOfOrigin facts don't modify the Origins and the State. + case Fact::Kind::Expire: + case Fact::Kind::ReturnOfOrigin: + return In; + } + llvm_unreachable("Unknown fact kind"); + } + + /// A new loan is issued to the origin. Old loans are erased. + LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) { + OriginID OID = F.getOriginID(); + LoanID LID = F.getLoanID(); + return LifetimeLattice( + Factory.OriginMapFact.add(In.Origins, OID, Factory.createLoanSet(LID))); + } + + /// The destination origin's loan set is replaced by the source's. + /// This implicitly "resets" the old loans of the destination. + LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) { + OriginID DestOID = F.getDestOriginID(); + OriginID SrcOID = F.getSrcOriginID(); + LoanSet SrcLoans = InState.getLoans(SrcOID, Factory); + return LifetimeLattice( + Factory.OriginMapFact.add(InState.Origins, DestOID, SrcLoans)); + } +}; +// ========================================================================= // +// Dataflow analysis +// ========================================================================= // + +/// Drives the intra-procedural dataflow analysis. +/// +/// Orchestrates the analysis by iterating over the CFG using a worklist +/// algorithm. It computes a fixed point by propagating the LifetimeLattice +/// state through each block until the state no longer changes. +/// TODO: Maybe use the dataflow framework! The framework might need changes +/// to support the current comparison done at block-entry. +class LifetimeDataflow { + const CFG &Cfg; + AnalysisDeclContext &AC; + LifetimeFactory LifetimeFact; + + Transferer Xfer; + + /// Stores the merged analysis state at the entry of each CFG block. + llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates; + /// Stores the analysis state at the exit of each CFG block, after the + /// transfer function has been applied. + llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates; + +public: + LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) + : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {} + + void run() { + llvm::TimeTraceScope TimeProfile("Lifetime Dataflow"); + ForwardDataflowWorklist Worklist(Cfg, AC); + const CFGBlock *Entry = &Cfg.getEntry(); + BlockEntryStates[Entry] = LifetimeLattice{}; + Worklist.enqueueBlock(Entry); + while (const CFGBlock *B = Worklist.dequeue()) { + LifetimeLattice EntryState = getEntryState(B); + LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState); + BlockExitStates[B] = ExitState; + + for (const CFGBlock *Successor : B->succs()) { + auto SuccIt = BlockEntryStates.find(Successor); + LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) + ? SuccIt->second + : LifetimeLattice{}; + LifetimeLattice NewSuccEntryState = + OldSuccEntryState.join(ExitState, LifetimeFact); + // Enqueue the successor if its entry state has changed. + // TODO(opt): Consider changing 'join' to report a change if != + // comparison is found expensive. + if (SuccIt == BlockEntryStates.end() || + NewSuccEntryState != OldSuccEntryState) { + BlockEntryStates[Successor] = NewSuccEntryState; + Worklist.enqueueBlock(Successor); + } + } + } + } + + void dump() const { + llvm::dbgs() << "==========================================\n"; + llvm::dbgs() << " Dataflow results:\n"; + llvm::dbgs() << "==========================================\n"; + const CFGBlock &B = Cfg.getExit(); + getExitState(&B).dump(llvm::dbgs()); + } + + LifetimeLattice getEntryState(const CFGBlock *B) const { + auto It = BlockEntryStates.find(B); + if (It != BlockEntryStates.end()) { + return It->second; + } + return LifetimeLattice{}; + } + + LifetimeLattice getExitState(const CFGBlock *B) const { + auto It = BlockExitStates.find(B); + if (It != BlockExitStates.end()) { + return It->second; + } + return LifetimeLattice{}; + } +}; + +// ========================================================================= // +// TODO: Analysing dataflow results and error reporting. // ========================================================================= // } // anonymous namespace @@ -504,5 +744,18 @@ void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, FactGenerator FactGen(FactMgr, AC); FactGen.run(); DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC)); + + /// TODO(opt): Consider optimizing individual blocks before running the + /// dataflow analysis. + /// 1. Expression Origins: These are assigned once and read at most once, + /// forming simple chains. These chains can be compressed into a single + /// assignment. + /// 2. Block-Local Loans: Origins of expressions are never read by other + /// blocks; only Decls are visible. Therefore, loans in a block that + /// never reach an Origin associated with a Decl can be safely dropped by + /// the analysis. + LifetimeDataflow Dataflow(Cfg, FactMgr, AC); + Dataflow.run(); + DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump()); } } // namespace clang diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp index 4ca094a4393b0..8f4547e77681a 100644 --- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp @@ -18,6 +18,10 @@ MyObj* return_local_addr() { // CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]]) // CHECK: Expire (LoanID: [[L_X]]) } +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]] +// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]] // Pointer Assignment and Return @@ -42,6 +46,14 @@ MyObj* assign_and_return_local_addr() { // CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]]) // CHECK: Expire (LoanID: [[L_Y]]) } +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]] +// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]] // Return of Non-Pointer Type @@ -52,6 +64,8 @@ int return_int_val() { return x; } // CHECK-NEXT: End of Block +// CHECK: Dataflow results: +// CHECK: <empty> // Loan Expiration (Automatic Variable, C++) @@ -64,6 +78,9 @@ void loan_expires_cpp() { // CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]]) // CHECK: Expire (LoanID: [[L_OBJ]]) } +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]] +// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]] // FIXME: No expire for Trivial Destructors @@ -78,6 +95,9 @@ void loan_expires_trivial() { // CHECK-NEXT: End of Block // FIXME: Add check for Expire once trivial destructors are handled for expiration. } +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]] +// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]] // CHECK-LABEL: Function: conditional @@ -98,6 +118,66 @@ void conditional(bool condition) { // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]]) // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]]) } +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]] +// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]] +// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]] +// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]] + + +// CHECK-LABEL: Function: pointers_in_a_cycle +void pointers_in_a_cycle(bool condition) { + MyObj v1{1}; + MyObj v2{1}; + MyObj v3{1}; + + MyObj* p1 = &v1; + MyObj* p2 = &v2; + MyObj* p3 = &v3; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], OriginID: [[O_ADDR_V1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P1:[0-9]+]], SrcID: [[O_ADDR_V1]]) +// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], OriginID: [[O_ADDR_V2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P2:[0-9]+]], SrcID: [[O_ADDR_V2]]) +// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], OriginID: [[O_ADDR_V3:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P3:[0-9]+]], SrcID: [[O_ADDR_V3]]) + + while (condition) { + MyObj* temp = p1; + p1 = p2; + p2 = p3; + p3 = temp; +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (DestID: [[O_P1_RVAL:[0-9]+]], SrcID: [[O_P1]]) +// CHECK: AssignOrigin (DestID: [[O_TEMP:[0-9]+]], SrcID: [[O_P1_RVAL]]) +// CHECK: AssignOrigin (DestID: [[O_P2_RVAL:[0-9]+]], SrcID: [[O_P2]]) +// CHECK: AssignOrigin (DestID: [[O_P1]], SrcID: [[O_P2_RVAL]]) +// CHECK: AssignOrigin (DestID: [[O_P3_RVAL:[0-9]+]], SrcID: [[O_P3]]) +// CHECK: AssignOrigin (DestID: [[O_P2]], SrcID: [[O_P3_RVAL]]) +// CHECK: AssignOrigin (DestID: [[O_TEMP_RVAL:[0-9]+]], SrcID: [[O_TEMP]]) +// CHECK: AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]]) + } +} +// At the end of the analysis, the origins for the pointers involved in the cycle +// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point. +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]] +// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]] +// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]] +// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]] // CHECK-LABEL: Function: overwrite_origin @@ -114,6 +194,9 @@ void overwrite_origin() { // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } +// CHECK: Dataflow results: +// CHECK: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]] // CHECK-LABEL: Function: reassign_to_null @@ -129,6 +212,8 @@ void reassign_to_null() { } // FIXME: Have a better representation for nullptr than just an empty origin. // It should be a separate loan and origin kind. +// CHECK: Dataflow results: +// CHECK: Origin [[O_P]] contains no loans // CHECK-LABEL: Function: reassign_in_if @@ -149,6 +234,102 @@ void reassign_in_if(bool condition) { // CHECK: Expire (LoanID: [[L_S2]]) // CHECK: Expire (LoanID: [[L_S1]]) } +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] + + +// CHECK-LABEL: Function: assign_in_switch +void assign_in_switch(int mode) { + MyObj s1; + MyObj s2; + MyObj s3; + MyObj* p = nullptr; +// CHECK: Block B{{[0-9]+}}: +// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) + switch (mode) { + case 1: + p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S1]]) + break; + case 2: + p = &s2; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + break; + default: + p = &s3; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], OriginID: [[O_ADDR_S3:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S3]]) + break; + } +// CHECK: Block B{{[0-9]+}}: +// CHECK-DAG: Expire (LoanID: [[L_S3]]) +// CHECK-DAG: Expire (LoanID: [[L_S2]]) +// CHECK-DAG: Expire (LoanID: [[L_S1]]) +} +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]] +// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]] + + +// CHECK-LABEL: Function: loan_in_loop +void loan_in_loop(bool condition) { + MyObj* p = nullptr; + // CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]]) + // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]]) + while (condition) { + MyObj inner; + p = &inner; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]]) +// CHECK: Expire (LoanID: [[L_INNER]]) + } +} +// CHECK: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] +// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] + + +// CHECK-LABEL: Function: loop_with_break +void loop_with_break(int count) { + MyObj s1; + MyObj s2; + MyObj* p = &s1; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]]) + for (int i = 0; i < count; ++i) { + if (i == 5) { + p = &s2; +// CHECK: Block B{{[0-9]+}}: +// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]]) +// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]]) + break; + } + } +// CHECK: Block B{{[0-9]+}}: +// CHECK: Expire (LoanID: [[L_S2]]) +// CHECK: Expire (LoanID: [[L_S1]]) +} + +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]] +// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]] +// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]] // CHECK-LABEL: Function: nested_scopes @@ -173,6 +354,11 @@ void nested_scopes() { // CHECK: Expire (LoanID: [[L_OUTER]]) } +// CHECK-LABEL: Dataflow results: +// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]] +// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]] +// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]] + // CHECK-LABEL: Function: pointer_indirection void pointer_indirection() { >From 293b67791504aaaebd8a0cb40a1206c25cacefe8 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 10:31:14 +0200 Subject: [PATCH 2/6] Remove braces. Co-authored-by: Baranov Victor <bar.victor.2...@gmail.com> --- clang/lib/Analysis/LifetimeSafety.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 7870352f0287a..fea84ba88bff1 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -139,9 +139,8 @@ class OriginManager { OriginID get(const Expr &E) { // Origin of DeclRefExpr is that of the declaration it refers to. - if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) { + if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) return get(*DRE->getDecl()); - } auto It = ExprToOriginID.find(&E); // TODO: This should be an assert(It != ExprToOriginID.end()). The current // implementation falls back to getOrCreate to avoid crashing on >From 4266ae866bdd2dd5f5d37ea7831b02abf3f7a83f Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 10:32:47 +0200 Subject: [PATCH 3/6] Apply suggestions: Remove braces Co-authored-by: Baranov Victor <bar.victor.2...@gmail.com> --- clang/lib/Analysis/LifetimeSafety.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index fea84ba88bff1..7c11f8612c87e 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -310,9 +310,8 @@ class FactManager { } void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) { - if (!NewFacts.empty()) { + if (!NewFacts.empty()) BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end()); - } } template <typename FactType, typename... Args> @@ -325,10 +324,9 @@ class FactManager { llvm::dbgs() << "==========================================\n"; llvm::dbgs() << " Lifetime Analysis Facts:\n"; llvm::dbgs() << "==========================================\n"; - if (const Decl *D = AC.getDecl()) { + if (const Decl *D = AC.getDecl()) if (const auto *ND = dyn_cast<NamedDecl>(D)) llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n"; - } // Print blocks in the order as they appear in code for a stable ordering. for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) { llvm::dbgs() << " Block B" << B->getBlockID() << ":\n"; @@ -476,11 +474,9 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { const AccessPath &LoanPath = L.Path; // Check if the loan is for a stack variable and if that variable // is the one being destructed. - if (LoanPath.PathKind == AccessPath::Kind::StackVariable) { - if (LoanPath.D == DestructedVD) { + if (LoanPath.PathKind == AccessPath::Kind::StackVariable) + if (LoanPath.D == DestructedVD) CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID)); - } - } } } >From 7f472822af1c4644808649869c9bc1b9e0907af9 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 08:33:18 +0000 Subject: [PATCH 4/6] address comments --- clang/include/clang/Analysis/Analyses/LifetimeSafety.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index daf24fff72b9b..85b7b5ec7b921 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -1,5 +1,5 @@ -#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H -#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H +#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H #include "clang/AST/DeclBase.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" @@ -10,4 +10,4 @@ void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, } // namespace clang -#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H +#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H >From a78bd02a087dc90477bbd8815e525a976fed7d5a Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 08:43:39 +0000 Subject: [PATCH 5/6] add licence for the new files --- .../clang/Analysis/Analyses/LifetimeSafety.h | 17 +++++++++++++++++ clang/lib/Analysis/LifetimeSafety.cpp | 11 ++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index 85b7b5ec7b921..912ca11af3f0a 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -1,3 +1,20 @@ +//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the entry point for a dataflow-based static analysis +// that checks for C++ lifetime violations. +// +// The analysis is based on the concepts of "origins" and "loans" to track +// pointer lifetimes and detect issues like use-after-free and dangling +// pointers. See the RFC for more details: +// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291 +// +//===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H #define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H #include "clang/AST/DeclBase.h" diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 7c11f8612c87e..7e30af3a1955c 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -1,14 +1,19 @@ +//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/LifetimeSafety.h" #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/Type.h" +#include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CFG.h" -#include "clang/Analysis/FlowSensitive/DataflowWorklist.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ImmutableMap.h" -#include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" >From 9ba1cd258e63889fa6848d0d0624659b92c6944d Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena <u...@google.com> Date: Mon, 7 Jul 2025 11:07:01 +0000 Subject: [PATCH 6/6] remove AccessPath::Kind --- .../clang/Analysis/Analyses/LifetimeSafety.h | 4 +- clang/lib/Analysis/LifetimeSafety.cpp | 39 +++++++------------ clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 ++-- 3 files changed, 21 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h index 912ca11af3f0a..9998702a41cab 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h @@ -22,8 +22,8 @@ #include "clang/Analysis/CFG.h" namespace clang { -void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC); +void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC); } // namespace clang diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 7e30af3a1955c..cdbab31ac7a9c 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -25,21 +25,11 @@ namespace { /// Represents the storage location being borrowed, e.g., a specific stack /// variable. +/// TODO: Model access paths of other types, e.g., s.field, heap and globals. struct AccessPath { const clang::ValueDecl *D; - enum class Kind : uint8_t { - StackVariable, - Temporary, // TODO: Handle. - Field, // TODO: Handle like `s.y`. - Heap, // TODO: Handle. - ArrayElement, // TODO: Handle. - Static, // TODO: Handle. - }; - - Kind PathKind; - - AccessPath(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {} + AccessPath(const clang::ValueDecl *D) : D(D) {} }; /// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type. @@ -399,11 +389,11 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { if (!hasOrigin(ICE->getType())) return; Visit(ICE->getSubExpr()); - /// TODO: Consider if this is actually useful in practice. Alternatively, we - /// could directly use the sub-expression's OriginID instead of creating a - /// new one. // An ImplicitCastExpr node itself gets an origin, which flows from the // origin of its sub-expression (after stripping its own parens/casts). + // TODO: Consider if this is actually useful in practice. Alternatively, we + // could directly use the sub-expression's OriginID instead of creating a + // new one. addAssignOriginFact(*ICE, *ICE->getSubExpr()); } @@ -415,9 +405,9 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { // Check if it's a local variable. if (VD->hasLocalStorage()) { OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO); - AccessPath AddrOfLocalVarPath(VD, AccessPath::Kind::StackVariable); - Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, - UO->getOperatorLoc()); + AccessPath AddrOfLocalVarPath(VD); + const Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath, + UO->getOperatorLoc()); CurrentBlockFacts.push_back( FactMgr.createFact<IssueFact>(L.ID, OID)); } @@ -469,6 +459,8 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { /// TODO: Also handle trivial destructors (e.g., for `int` /// variables) which will never have a CFGAutomaticObjDtor node. /// TODO: Handle loans to temporaries. + /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the + /// lifetime ends. const VarDecl *DestructedVD = DtorOpt.getVarDecl(); if (!DestructedVD) return; @@ -479,9 +471,8 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> { const AccessPath &LoanPath = L.Path; // Check if the loan is for a stack variable and if that variable // is the one being destructed. - if (LoanPath.PathKind == AccessPath::Kind::StackVariable) - if (LoanPath.D == DestructedVD) - CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID)); + if (LoanPath.D == DestructedVD) + CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID)); } } @@ -735,9 +726,9 @@ class LifetimeDataflow { // ========================================================================= // } // anonymous namespace -void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg, - AnalysisDeclContext &AC) { - llvm::TimeTraceScope TimeProfile("Lifetime Analysis"); +void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg, + AnalysisDeclContext &AC) { + llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis"); DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(), /*ShowColors=*/true)); FactManager FactMgr; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index e16412e92d985..4cce77e384ad7 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -2746,6 +2746,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( .setAlwaysAdd(Stmt::UnaryOperatorClass); } + bool EnableLifetimeSafetyAnalysis = !Diags.isIgnored( + diag::warn_experimental_lifetime_safety_dummy_warning, D->getBeginLoc()); // Install the logical handler. std::optional<LogicalErrorHandler> LEH; if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) { @@ -2870,11 +2872,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings( // TODO: Enable lifetime safety analysis for other languages once it is // stable. - if (!Diags.isIgnored(diag::warn_experimental_lifetime_safety_dummy_warning, - D->getBeginLoc()) && - S.getLangOpts().CPlusPlus) { + if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) { if (CFG *cfg = AC.getCFG()) - runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC); + runLifetimeSafetyAnalysis(*cast<DeclContext>(D), *cfg, AC); } // Check for violations of "called once" parameter properties. if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus && _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits