[llvm-branch-commits] [clang] [LifetimeSafety] Propagate loans using dataflow analysis (PR #147295)

Utkarsh Saxena via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jul 7 06:28:54 PDT 2025


https://github.com/usx95 created https://github.com/llvm/llvm-project/pull/147295

None

>From 2e4261b02b6230a8c79f01a673cc3030cfff3ea7 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Sun, 6 Jul 2025 19:12:55 +0000
Subject: [PATCH 1/6] [LifetimeSafety] Propagate loans using dataflow analysis

---
 clang/lib/Analysis/LifetimeSafety.cpp         | 255 +++++++++++++++++-
 .../Sema/warn-lifetime-safety-dataflow.cpp    | 186 +++++++++++++
 2 files changed, 440 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index 3fe30e36ebd0f..7870352f0287a 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -491,7 +491,247 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
 };
 
 // ========================================================================= //
-//  TODO: Run dataflow analysis to propagate loans, analyse and error reporting.
+//                              The Dataflow Lattice
+// ========================================================================= //
+
+// Using LLVM's immutable collections is efficient for dataflow analysis
+// as it avoids deep copies during state transitions.
+// TODO(opt): Consider using a bitset to represent the set of loans.
+using LoanSet = llvm::ImmutableSet<LoanID>;
+using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
+
+/// An object to hold the factories for immutable collections, ensuring
+/// that all created states share the same underlying memory management.
+struct LifetimeFactory {
+  OriginLoanMap::Factory OriginMapFact;
+  LoanSet::Factory LoanSetFact;
+
+  LoanSet createLoanSet(LoanID LID) {
+    return LoanSetFact.add(LoanSetFact.getEmptySet(), LID);
+  }
+};
+
+/// LifetimeLattice represents the state of our analysis at a given program
+/// point. It is an immutable object, and all operations produce a new
+/// instance rather than modifying the existing one.
+struct LifetimeLattice {
+  /// The map from an origin to the set of loans it contains.
+  /// TODO(opt): To reduce the lattice size, propagate origins of declarations,
+  /// not expressions, because expressions are not visible across blocks.
+  OriginLoanMap Origins = OriginLoanMap(nullptr);
+
+  explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {}
+  LifetimeLattice() = default;
+
+  bool operator==(const LifetimeLattice &Other) const {
+    return Origins == Other.Origins;
+  }
+  bool operator!=(const LifetimeLattice &Other) const {
+    return !(*this == Other);
+  }
+
+  LoanSet getLoans(OriginID OID, LifetimeFactory &Factory) const {
+    if (auto *Loans = Origins.lookup(OID))
+      return *Loans;
+    return Factory.LoanSetFact.getEmptySet();
+  }
+
+  /// Computes the union of two lattices by performing a key-wise join of
+  /// their OriginLoanMaps.
+  // TODO(opt): This key-wise join is a performance bottleneck. A more
+  // efficient merge could be implemented using a Patricia Trie or HAMT
+  // instead of the current AVL-tree-based ImmutableMap.
+  LifetimeLattice join(const LifetimeLattice &Other,
+                       LifetimeFactory &Factory) const {
+    /// Merge the smaller map into the larger one ensuring we iterate over the
+    /// smaller map.
+    if (Origins.getHeight() < Other.Origins.getHeight())
+      return Other.join(*this, Factory);
+
+    OriginLoanMap JoinedState = Origins;
+    // For each origin in the other map, union its loan set with ours.
+    for (const auto &Entry : Other.Origins) {
+      OriginID OID = Entry.first;
+      LoanSet OtherLoanSet = Entry.second;
+      JoinedState = Factory.OriginMapFact.add(
+          JoinedState, OID,
+          join(getLoans(OID, Factory), OtherLoanSet, Factory));
+    }
+    return LifetimeLattice(JoinedState);
+  }
+
+  LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const {
+    /// Merge the smaller set into the larger one ensuring we iterate over the
+    /// smaller set.
+    if (a.getHeight() < b.getHeight())
+      std::swap(a, b);
+    LoanSet Result = a;
+    for (LoanID LID : b) {
+      /// TODO(opt): Profiling shows that this loop is a major performance
+      /// bottleneck. Investigate using a BitVector to represent the set of
+      /// loans for improved join performance.
+      Result = Factory.LoanSetFact.add(Result, LID);
+    }
+    return Result;
+  }
+
+  void dump(llvm::raw_ostream &OS) const {
+    OS << "LifetimeLattice State:\n";
+    if (Origins.isEmpty())
+      OS << "  <empty>\n";
+    for (const auto &Entry : Origins) {
+      if (Entry.second.isEmpty())
+        OS << "  Origin " << Entry.first << " contains no loans\n";
+      for (const LoanID &LID : Entry.second)
+        OS << "  Origin " << Entry.first << " contains Loan " << LID << "\n";
+    }
+  }
+};
+
+// ========================================================================= //
+//                              The Transfer Function
+// ========================================================================= //
+class Transferer {
+  FactManager &AllFacts;
+  LifetimeFactory &Factory;
+
+public:
+  explicit Transferer(FactManager &F, LifetimeFactory &Factory)
+      : AllFacts(F), Factory(Factory) {}
+
+  /// Computes the exit state of a block by applying all its facts sequentially
+  /// to a given entry state.
+  /// TODO: We might need to store intermediate states per-fact in the block for
+  /// later analysis.
+  LifetimeLattice transferBlock(const CFGBlock *Block,
+                                LifetimeLattice EntryState) {
+    LifetimeLattice BlockState = EntryState;
+    llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(Block);
+
+    for (const Fact *F : Facts) {
+      BlockState = transferFact(BlockState, F);
+    }
+    return BlockState;
+  }
+
+private:
+  LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) {
+    switch (F->getKind()) {
+    case Fact::Kind::Issue:
+      return transfer(In, *F->getAs<IssueFact>());
+    case Fact::Kind::AssignOrigin:
+      return transfer(In, *F->getAs<AssignOriginFact>());
+    // Expire and ReturnOfOrigin facts don't modify the Origins and the State.
+    case Fact::Kind::Expire:
+    case Fact::Kind::ReturnOfOrigin:
+      return In;
+    }
+    llvm_unreachable("Unknown fact kind");
+  }
+
+  /// A new loan is issued to the origin. Old loans are erased.
+  LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) {
+    OriginID OID = F.getOriginID();
+    LoanID LID = F.getLoanID();
+    return LifetimeLattice(
+        Factory.OriginMapFact.add(In.Origins, OID, Factory.createLoanSet(LID)));
+  }
+
+  /// The destination origin's loan set is replaced by the source's.
+  /// This implicitly "resets" the old loans of the destination.
+  LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) {
+    OriginID DestOID = F.getDestOriginID();
+    OriginID SrcOID = F.getSrcOriginID();
+    LoanSet SrcLoans = InState.getLoans(SrcOID, Factory);
+    return LifetimeLattice(
+        Factory.OriginMapFact.add(InState.Origins, DestOID, SrcLoans));
+  }
+};
+// ========================================================================= //
+//                              Dataflow analysis
+// ========================================================================= //
+
+/// Drives the intra-procedural dataflow analysis.
+///
+/// Orchestrates the analysis by iterating over the CFG using a worklist
+/// algorithm. It computes a fixed point by propagating the LifetimeLattice
+/// state through each block until the state no longer changes.
+/// TODO: Maybe use the dataflow framework! The framework might need changes
+/// to support the current comparison done at block-entry.
+class LifetimeDataflow {
+  const CFG &Cfg;
+  AnalysisDeclContext &AC;
+  LifetimeFactory LifetimeFact;
+
+  Transferer Xfer;
+
+  /// Stores the merged analysis state at the entry of each CFG block.
+  llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates;
+  /// Stores the analysis state at the exit of each CFG block, after the
+  /// transfer function has been applied.
+  llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates;
+
+public:
+  LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC)
+      : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {}
+
+  void run() {
+    llvm::TimeTraceScope TimeProfile("Lifetime Dataflow");
+    ForwardDataflowWorklist Worklist(Cfg, AC);
+    const CFGBlock *Entry = &Cfg.getEntry();
+    BlockEntryStates[Entry] = LifetimeLattice{};
+    Worklist.enqueueBlock(Entry);
+    while (const CFGBlock *B = Worklist.dequeue()) {
+      LifetimeLattice EntryState = getEntryState(B);
+      LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState);
+      BlockExitStates[B] = ExitState;
+
+      for (const CFGBlock *Successor : B->succs()) {
+        auto SuccIt = BlockEntryStates.find(Successor);
+        LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end())
+                                                ? SuccIt->second
+                                                : LifetimeLattice{};
+        LifetimeLattice NewSuccEntryState =
+            OldSuccEntryState.join(ExitState, LifetimeFact);
+        // Enqueue the successor if its entry state has changed.
+        // TODO(opt): Consider changing 'join' to report a change if !=
+        // comparison is found expensive.
+        if (SuccIt == BlockEntryStates.end() ||
+            NewSuccEntryState != OldSuccEntryState) {
+          BlockEntryStates[Successor] = NewSuccEntryState;
+          Worklist.enqueueBlock(Successor);
+        }
+      }
+    }
+  }
+
+  void dump() const {
+    llvm::dbgs() << "==========================================\n";
+    llvm::dbgs() << "       Dataflow results:\n";
+    llvm::dbgs() << "==========================================\n";
+    const CFGBlock &B = Cfg.getExit();
+    getExitState(&B).dump(llvm::dbgs());
+  }
+
+  LifetimeLattice getEntryState(const CFGBlock *B) const {
+    auto It = BlockEntryStates.find(B);
+    if (It != BlockEntryStates.end()) {
+      return It->second;
+    }
+    return LifetimeLattice{};
+  }
+
+  LifetimeLattice getExitState(const CFGBlock *B) const {
+    auto It = BlockExitStates.find(B);
+    if (It != BlockExitStates.end()) {
+      return It->second;
+    }
+    return LifetimeLattice{};
+  }
+};
+
+// ========================================================================= //
+//  TODO: Analysing dataflow results and error reporting.
 // ========================================================================= //
 } // anonymous namespace
 
@@ -504,5 +744,18 @@ void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
   FactGenerator FactGen(FactMgr, AC);
   FactGen.run();
   DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC));
+
+  /// TODO(opt): Consider optimizing individual blocks before running the
+  /// dataflow analysis.
+  /// 1. Expression Origins: These are assigned once and read at most once,
+  ///    forming simple chains. These chains can be compressed into a single
+  ///    assignment.
+  /// 2. Block-Local Loans: Origins of expressions are never read by other
+  ///    blocks; only Decls are visible.  Therefore, loans in a block that
+  ///    never reach an Origin associated with a Decl can be safely dropped by
+  ///    the analysis.
+  LifetimeDataflow Dataflow(Cfg, FactMgr, AC);
+  Dataflow.run();
+  DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump());
 }
 } // namespace clang
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 4ca094a4393b0..8f4547e77681a 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -18,6 +18,10 @@ MyObj* return_local_addr() {
 // CHECK:   ReturnOfOrigin (OriginID: [[O_RET_VAL]])
 // CHECK:   Expire (LoanID: [[L_X]])
 }
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]]
+// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]]
 
 
 // Pointer Assignment and Return
@@ -42,6 +46,14 @@ MyObj* assign_and_return_local_addr() {
 // CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]])
 // CHECK: Expire (LoanID: [[L_Y]])
 }
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]]
 
 
 // Return of Non-Pointer Type
@@ -52,6 +64,8 @@ int return_int_val() {
   return x;
 }
 // CHECK-NEXT: End of Block
+// CHECK: Dataflow results:
+// CHECK:  <empty>
 
 
 // Loan Expiration (Automatic Variable, C++)
@@ -64,6 +78,9 @@ void loan_expires_cpp() {
 // CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]])
 // CHECK: Expire (LoanID: [[L_OBJ]])
 }
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]]
+// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]]
 
 
 // FIXME: No expire for Trivial Destructors
@@ -78,6 +95,9 @@ void loan_expires_trivial() {
 // CHECK-NEXT: End of Block
   // FIXME: Add check for Expire once trivial destructors are handled for expiration.
 }
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]]
+// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]]
 
 
 // CHECK-LABEL: Function: conditional
@@ -98,6 +118,66 @@ void conditional(bool condition) {
   // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]])
   // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]])
 }
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]]
+// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]]
+// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]]
+// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]]
+
+
+// CHECK-LABEL: Function: pointers_in_a_cycle
+void pointers_in_a_cycle(bool condition) {
+  MyObj v1{1};
+  MyObj v2{1};
+  MyObj v3{1};
+
+  MyObj* p1 = &v1;
+  MyObj* p2 = &v2;
+  MyObj* p3 = &v3;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_V1:[0-9]+]], OriginID: [[O_ADDR_V1:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P1:[0-9]+]], SrcID: [[O_ADDR_V1]])
+// CHECK:   Issue (LoanID: [[L_V2:[0-9]+]], OriginID: [[O_ADDR_V2:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P2:[0-9]+]], SrcID: [[O_ADDR_V2]])
+// CHECK:   Issue (LoanID: [[L_V3:[0-9]+]], OriginID: [[O_ADDR_V3:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P3:[0-9]+]], SrcID: [[O_ADDR_V3]])
+
+  while (condition) {
+    MyObj* temp = p1;
+    p1 = p2;
+    p2 = p3;
+    p3 = temp;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   AssignOrigin (DestID: [[O_P1_RVAL:[0-9]+]], SrcID: [[O_P1]])
+// CHECK:   AssignOrigin (DestID: [[O_TEMP:[0-9]+]], SrcID: [[O_P1_RVAL]])
+// CHECK:   AssignOrigin (DestID: [[O_P2_RVAL:[0-9]+]], SrcID: [[O_P2]])
+// CHECK:   AssignOrigin (DestID: [[O_P1]], SrcID: [[O_P2_RVAL]])
+// CHECK:   AssignOrigin (DestID: [[O_P3_RVAL:[0-9]+]], SrcID: [[O_P3]])
+// CHECK:   AssignOrigin (DestID: [[O_P2]], SrcID: [[O_P3_RVAL]])
+// CHECK:   AssignOrigin (DestID: [[O_TEMP_RVAL:[0-9]+]], SrcID: [[O_TEMP]])
+// CHECK:   AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]])
+  }
+}
+// At the end of the analysis, the origins for the pointers involved in the cycle
+// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point.
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]]
 
 
 // CHECK-LABEL: Function: overwrite_origin
@@ -114,6 +194,9 @@ void overwrite_origin() {
 // CHECK:   Expire (LoanID: [[L_S2]])
 // CHECK:   Expire (LoanID: [[L_S1]])
 }
+// CHECK: Dataflow results:
+// CHECK:     Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]]
 
 
 // CHECK-LABEL: Function: reassign_to_null
@@ -129,6 +212,8 @@ void reassign_to_null() {
 }
 // FIXME: Have a better representation for nullptr than just an empty origin. 
 //        It should be a separate loan and origin kind.
+// CHECK: Dataflow results:
+// CHECK: Origin [[O_P]] contains no loans
 
 
 // CHECK-LABEL: Function: reassign_in_if
@@ -149,6 +234,102 @@ void reassign_in_if(bool condition) {
 // CHECK:   Expire (LoanID: [[L_S2]])
 // CHECK:   Expire (LoanID: [[L_S1]])
 }
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
+
+
+// CHECK-LABEL: Function: assign_in_switch
+void assign_in_switch(int mode) {
+  MyObj s1;
+  MyObj s2;
+  MyObj s3;
+  MyObj* p = nullptr;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]])
+  switch (mode) {
+    case 1:
+      p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S1]])
+      break;
+    case 2:
+      p = &s2;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]])
+      break;
+    default:
+      p = &s3;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_S3:[0-9]+]], OriginID: [[O_ADDR_S3:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S3]])
+      break;
+  }
+// CHECK: Block B{{[0-9]+}}:
+// CHECK-DAG:   Expire (LoanID: [[L_S3]])
+// CHECK-DAG:   Expire (LoanID: [[L_S2]])
+// CHECK-DAG:   Expire (LoanID: [[L_S1]])
+}
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]]
+// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]]
+
+
+// CHECK-LABEL: Function: loan_in_loop
+void loan_in_loop(bool condition) {
+  MyObj* p = nullptr;
+  // CHECK:   AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+  // CHECK:   AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]])
+  while (condition) {
+    MyObj inner;
+    p = &inner;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]])
+// CHECK:   Expire (LoanID: [[L_INNER]])
+  }
+}
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]]
+// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]]
+
+
+// CHECK-LABEL: Function: loop_with_break
+void loop_with_break(int count) {
+  MyObj s1;
+  MyObj s2;
+  MyObj* p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]])
+  for (int i = 0; i < count; ++i) {
+    if (i == 5) {
+      p = &s2;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]])
+// CHECK:   AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]])
+      break;
+    }
+  }
+// CHECK: Block B{{[0-9]+}}:
+// CHECK:   Expire (LoanID: [[L_S2]])
+// CHECK:   Expire (LoanID: [[L_S1]])
+}
+
+// CHECK-LABEL: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
 
 
 // CHECK-LABEL: Function: nested_scopes
@@ -173,6 +354,11 @@ void nested_scopes() {
 // CHECK:   Expire (LoanID: [[L_OUTER]])
 }
 
+// CHECK-LABEL: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]]
+// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]]
+// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]]
+
 
 // CHECK-LABEL: Function: pointer_indirection
 void pointer_indirection() {

>From 293b67791504aaaebd8a0cb40a1206c25cacefe8 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 10:31:14 +0200
Subject: [PATCH 2/6] Remove braces.

Co-authored-by: Baranov Victor <bar.victor.2002 at gmail.com>
---
 clang/lib/Analysis/LifetimeSafety.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index 7870352f0287a..fea84ba88bff1 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -139,9 +139,8 @@ class OriginManager {
 
   OriginID get(const Expr &E) {
     // Origin of DeclRefExpr is that of the declaration it refers to.
-    if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(&E))
       return get(*DRE->getDecl());
-    }
     auto It = ExprToOriginID.find(&E);
     // TODO: This should be an assert(It != ExprToOriginID.end()). The current
     // implementation falls back to getOrCreate to avoid crashing on

>From 4266ae866bdd2dd5f5d37ea7831b02abf3f7a83f Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 10:32:47 +0200
Subject: [PATCH 3/6] Apply suggestions: Remove braces

Co-authored-by: Baranov Victor <bar.victor.2002 at gmail.com>
---
 clang/lib/Analysis/LifetimeSafety.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index fea84ba88bff1..7c11f8612c87e 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -310,9 +310,8 @@ class FactManager {
   }
 
   void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
-    if (!NewFacts.empty()) {
+    if (!NewFacts.empty())
       BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
-    }
   }
 
   template <typename FactType, typename... Args>
@@ -325,10 +324,9 @@ class FactManager {
     llvm::dbgs() << "==========================================\n";
     llvm::dbgs() << "       Lifetime Analysis Facts:\n";
     llvm::dbgs() << "==========================================\n";
-    if (const Decl *D = AC.getDecl()) {
+    if (const Decl *D = AC.getDecl())
       if (const auto *ND = dyn_cast<NamedDecl>(D))
         llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
-    }
     // Print blocks in the order as they appear in code for a stable ordering.
     for (const CFGBlock *B : *AC.getAnalysis<PostOrderCFGView>()) {
       llvm::dbgs() << "  Block B" << B->getBlockID() << ":\n";
@@ -476,11 +474,9 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
       const AccessPath &LoanPath = L.Path;
       // Check if the loan is for a stack variable and if that variable
       // is the one being destructed.
-      if (LoanPath.PathKind == AccessPath::Kind::StackVariable) {
-        if (LoanPath.D == DestructedVD) {
+      if (LoanPath.PathKind == AccessPath::Kind::StackVariable)
+        if (LoanPath.D == DestructedVD)
           CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID));
-        }
-      }
     }
   }
 

>From 7f472822af1c4644808649869c9bc1b9e0907af9 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 08:33:18 +0000
Subject: [PATCH 4/6] address comments

---
 clang/include/clang/Analysis/Analyses/LifetimeSafety.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
index daf24fff72b9b..85b7b5ec7b921 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
@@ -1,5 +1,5 @@
-#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H
-#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
 #include "clang/AST/DeclBase.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Analysis/CFG.h"
@@ -10,4 +10,4 @@ void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
 
 } // namespace clang
 
-#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H

>From a78bd02a087dc90477bbd8815e525a976fed7d5a Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 08:43:39 +0000
Subject: [PATCH 5/6] add licence for the new files

---
 .../clang/Analysis/Analyses/LifetimeSafety.h    | 17 +++++++++++++++++
 clang/lib/Analysis/LifetimeSafety.cpp           | 11 ++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
index 85b7b5ec7b921..912ca11af3f0a 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
@@ -1,3 +1,20 @@
+//===- LifetimeSafety.h - C++ Lifetime Safety Analysis -*----------- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the entry point for a dataflow-based static analysis
+// that checks for C++ lifetime violations.
+//
+// The analysis is based on the concepts of "origins" and "loans" to track
+// pointer lifetimes and detect issues like use-after-free and dangling
+// pointers. See the RFC for more details:
+// https://discourse.llvm.org/t/rfc-intra-procedural-lifetime-analysis-in-clang/86291
+//
+//===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
 #define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
 #include "clang/AST/DeclBase.h"
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index 7c11f8612c87e..7e30af3a1955c 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -1,14 +1,19 @@
+//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
 #include "clang/Analysis/Analyses/LifetimeSafety.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Analysis/CFG.h"
-#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/ImmutableMap.h"
-#include "llvm/ADT/ImmutableSet.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"

>From 9ba1cd258e63889fa6848d0d0624659b92c6944d Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 7 Jul 2025 11:07:01 +0000
Subject: [PATCH 6/6] remove AccessPath::Kind

---
 .../clang/Analysis/Analyses/LifetimeSafety.h  |  4 +-
 clang/lib/Analysis/LifetimeSafety.cpp         | 39 +++++++------------
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  8 ++--
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
index 912ca11af3f0a..9998702a41cab 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
@@ -22,8 +22,8 @@
 #include "clang/Analysis/CFG.h"
 namespace clang {
 
-void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
-                         AnalysisDeclContext &AC);
+void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg,
+                               AnalysisDeclContext &AC);
 
 } // namespace clang
 
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index 7e30af3a1955c..cdbab31ac7a9c 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -25,21 +25,11 @@ namespace {
 
 /// Represents the storage location being borrowed, e.g., a specific stack
 /// variable.
+/// TODO: Model access paths of other types, e.g., s.field, heap and globals.
 struct AccessPath {
   const clang::ValueDecl *D;
 
-  enum class Kind : uint8_t {
-    StackVariable,
-    Temporary,    // TODO: Handle.
-    Field,        // TODO: Handle like `s.y`.
-    Heap,         // TODO: Handle.
-    ArrayElement, // TODO: Handle.
-    Static,       // TODO: Handle.
-  };
-
-  Kind PathKind;
-
-  AccessPath(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {}
+  AccessPath(const clang::ValueDecl *D) : D(D) {}
 };
 
 /// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type.
@@ -399,11 +389,11 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
     if (!hasOrigin(ICE->getType()))
       return;
     Visit(ICE->getSubExpr());
-    /// TODO: Consider if this is actually useful in practice. Alternatively, we
-    /// could directly use the sub-expression's OriginID instead of creating a
-    /// new one.
     // An ImplicitCastExpr node itself gets an origin, which flows from the
     // origin of its sub-expression (after stripping its own parens/casts).
+    // TODO: Consider if this is actually useful in practice. Alternatively, we
+    // could directly use the sub-expression's OriginID instead of creating a
+    // new one.
     addAssignOriginFact(*ICE, *ICE->getSubExpr());
   }
 
@@ -415,9 +405,9 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
           // Check if it's a local variable.
           if (VD->hasLocalStorage()) {
             OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO);
-            AccessPath AddrOfLocalVarPath(VD, AccessPath::Kind::StackVariable);
-            Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath,
-                                                   UO->getOperatorLoc());
+            AccessPath AddrOfLocalVarPath(VD);
+            const Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath,
+                                                         UO->getOperatorLoc());
             CurrentBlockFacts.push_back(
                 FactMgr.createFact<IssueFact>(L.ID, OID));
           }
@@ -469,6 +459,8 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
     /// TODO: Also handle trivial destructors (e.g., for `int`
     /// variables) which will never have a CFGAutomaticObjDtor node.
     /// TODO: Handle loans to temporaries.
+    /// TODO: Consider using clang::CFG::BuildOptions::AddLifetime to reuse the
+    /// lifetime ends.
     const VarDecl *DestructedVD = DtorOpt.getVarDecl();
     if (!DestructedVD)
       return;
@@ -479,9 +471,8 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
       const AccessPath &LoanPath = L.Path;
       // Check if the loan is for a stack variable and if that variable
       // is the one being destructed.
-      if (LoanPath.PathKind == AccessPath::Kind::StackVariable)
-        if (LoanPath.D == DestructedVD)
-          CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID));
+      if (LoanPath.D == DestructedVD)
+        CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID));
     }
   }
 
@@ -735,9 +726,9 @@ class LifetimeDataflow {
 // ========================================================================= //
 } // anonymous namespace
 
-void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
-                         AnalysisDeclContext &AC) {
-  llvm::TimeTraceScope TimeProfile("Lifetime Analysis");
+void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg,
+                               AnalysisDeclContext &AC) {
+  llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis");
   DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(),
                                        /*ShowColors=*/true));
   FactManager FactMgr;
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index e16412e92d985..4cce77e384ad7 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2746,6 +2746,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
       .setAlwaysAdd(Stmt::UnaryOperatorClass);
   }
 
+  bool EnableLifetimeSafetyAnalysis = !Diags.isIgnored(
+      diag::warn_experimental_lifetime_safety_dummy_warning, D->getBeginLoc());
   // Install the logical handler.
   std::optional<LogicalErrorHandler> LEH;
   if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) {
@@ -2870,11 +2872,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
 
   // TODO: Enable lifetime safety analysis for other languages once it is
   // stable.
-  if (!Diags.isIgnored(diag::warn_experimental_lifetime_safety_dummy_warning,
-                       D->getBeginLoc()) &&
-      S.getLangOpts().CPlusPlus) {
+  if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) {
     if (CFG *cfg = AC.getCFG())
-      runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC);
+      runLifetimeSafetyAnalysis(*cast<DeclContext>(D), *cfg, AC);
   }
   // Check for violations of "called once" parameter properties.
   if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus &&



More information about the llvm-branch-commits mailing list