[llvm-branch-commits] [clang] [LifetimeSafety][NFC] Add field-labeled child edges to OriginNode and generalize subtree walks (PR #201510)

Zhijie Wang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jun 3 23:11:36 PDT 2026


https://github.com/aeft updated https://github.com/llvm/llvm-project/pull/201510

>From 6adf0d0750c8af51f978219295e33465955a3fe0 Mon Sep 17 00:00:00 2001
From: Zhijie Wang <yesterda9 at gmail.com>
Date: Wed, 3 Jun 2026 20:48:56 -0700
Subject: [PATCH] [LifetimeSafety][NFC] Add field-labeled child edges to
 OriginNode and generalize subtree walks

---
 .../Analyses/LifetimeSafety/FactsGenerator.h  |  3 +
 .../Analyses/LifetimeSafety/Origins.h         | 76 +++++++++++++------
 clang/lib/Analysis/LifetimeSafety/Facts.cpp   |  1 +
 .../LifetimeSafety/FactsGenerator.cpp         | 36 ++++++---
 .../Analysis/LifetimeSafety/LiveOrigins.cpp   | 33 +++++---
 clang/lib/Analysis/LifetimeSafety/Origins.cpp |  5 +-
 6 files changed, 108 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index e08f039691f0e..e97322cf04604 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -142,6 +142,9 @@ class FactsGenerator : public ConstStmtVisitor<FactsGenerator> {
   void handleUse(const Expr *E);
 
   void markUseAsWrite(const DeclRefExpr *DRE);
+  /// Walks the full subtree so origins on the pointee chain and on field
+  /// children both escape with the returned value.
+  void emitReturnEscapes(OriginNode *N, const Expr *RetExpr);
 
   bool escapesViaReturn(OriginID OID) const;
 
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
index cacefc8aa62ad..d39e402ce9334 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
@@ -66,47 +66,78 @@ struct Origin {
   }
 };
 
-/// A list of origins representing levels of indirection for pointer-like types.
+/// A tree of origins representing the structure of a pointer-like or
+/// record type.
 ///
-/// Each node in the list contains an OriginID representing a level of
-/// indirection. The list structure captures the multi-level nature of
-/// pointer and reference types in the lifetime analysis.
+/// Each node carries an OriginID and is connected to children via labeled
+/// edges: either a pointee edge (one level of pointer/reference indirection)
+/// or a field edge (a named field of a record). Pointer-like types form a
+/// pointee chain; record types fan out via field edges.
 ///
 /// Examples:
-///   - For `int& x`, the list has size 2:
+///   - For `int& x`, the chain has length 2:
 ///     * Outer: origin for the reference storage itself (the lvalue `x`)
 ///     * Inner: origin for what `x` refers to
 ///
-///   - For `int* p`, the list has size 2:
+///   - For `int* p`, the chain has length 2:
 ///     * Outer: origin for the pointer variable `p`
 ///     * Inner: origin for what `p` points to
 ///
-///   - For `View v` (where View is gsl::Pointer), the list has size 2:
+///   - For `View v` (where View is gsl::Pointer), the chain has length 2:
 ///     * Outer: origin for the view object itself
 ///     * Inner: origin for what the view refers to
 ///
-///   - For `int** pp`, the list has size 3:
+///   - For `int** pp`, the chain has length 3:
 ///     * Outer: origin for `pp` itself
 ///     * Inner: origin for `*pp` (what `pp` points to)
 ///     * Inner->Inner: origin for `**pp` (what `*pp` points to)
 ///
-/// The list structure enables the analysis to track how loans flow through
-/// different levels of indirection when assignments and dereferences occur.
-///
-/// TODO: Currently list-shaped (each node has at most one pointee child).
-/// Will become tree-shaped once field children are added to support
-/// origin trees for records whose fields have origins.
+/// The structure enables the analysis to track how loans flow through
+/// levels of indirection and across record fields when assignments and
+/// dereferences occur.
 class OriginNode {
 public:
+  /// A labeled edge from this node to a child. The label distinguishes how
+  /// the child is reached: a null `FD` means a pointee edge (one level of
+  /// pointer/reference indirection); a non-null `FD` means a field edge
+  /// (the named field of a record). Putting the label on the edge lets
+  /// one child node play different roles per parent. For example, the subtree
+  /// for `s`'s `v` field is reached from `s`'s record (FD=v) and from
+  /// the lvalue outer built for the MemberExpr `s.v` (FD=null).
+  struct Edge {
+    const FieldDecl *FD;
+    OriginNode *Child;
+  };
+
   OriginNode(OriginID OID) : OID(OID) {}
 
+  OriginID getOriginID() const { return OID; }
+
+  llvm::ArrayRef<Edge> children() const { return Children; }
+
   OriginNode *getPointeeChild() const {
-    return Children.empty() ? nullptr : Children[0];
+    for (const Edge &E : Children)
+      if (!E.FD)
+        return E.Child;
+    return nullptr;
   }
 
-  OriginID getOriginID() const { return OID; }
+  OriginNode *getFieldChild(const FieldDecl *F) const {
+    assert(F);
+    for (const Edge &E : Children)
+      if (E.FD == F)
+        return E.Child;
+    return nullptr;
+  }
+
+  OriginNode *getFieldChildInChain(const FieldDecl *FD) const {
+    for (const OriginNode *N = this; N; N = N->getPointeeChild())
+      if (OriginNode *Child = N->getFieldChild(FD))
+        return Child;
+    return nullptr;
+  }
 
-  void setChildren(llvm::ArrayRef<OriginNode *> NewChildren) {
+  void setChildren(llvm::ArrayRef<Edge> NewChildren) {
     assert(Children.empty() && "children must be set at most once");
     Children = NewChildren;
   }
@@ -125,7 +156,7 @@ class OriginNode {
 
 private:
   OriginID OID;
-  llvm::ArrayRef<OriginNode *> Children;
+  llvm::ArrayRef<Edge> Children;
 };
 
 bool doesDeclHaveStorage(const ValueDecl *D);
@@ -138,18 +169,19 @@ class OriginManager {
 
   /// Gets or creates the OriginNode for a given ValueDecl.
   ///
-  /// Creates a list structure mirroring the levels of indirection in the
-  /// declaration's type (e.g., `int** p` creates list of size 2).
+  /// Creates a tree structure mirroring the levels of indirection in the
+  /// declaration's type (e.g., `int* p` creates a chain of length 2).
   ///
   /// \returns The OriginNode, or nullptr if the type is not pointer-like.
   OriginNode *getOrCreateNode(const ValueDecl *D);
 
   /// Gets or creates the OriginNode for a given Expr.
   ///
-  /// Creates a list based on the expression's type and value category:
+  /// Creates a tree structure based on the expression's type and value
+  /// category:
   /// - Lvalues get an implicit reference level (modeling addressability)
   /// - Rvalues of non-pointer type return nullptr (no trackable origin)
-  /// - DeclRefExpr may reuse the underlying declaration's list
+  /// - DeclRefExpr may reuse the underlying declaration's tree
   ///
   /// \returns The OriginNode, or nullptr for non-pointer rvalues.
   OriginNode *getOrCreateNode(const Expr *E);
diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
index d04d3e9202952..25458f6ef9b54 100644
--- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
@@ -82,6 +82,7 @@ void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &,
   OS << "Use (";
   size_t NumUsedOrigins = getUsedOrigins()->getLength();
   size_t I = 0;
+  // TODO: pointee-chain only; extend to field children.
   for (const OriginNode *Cur = getUsedOrigins(); Cur;
        Cur = Cur->getPointeeChild(), ++I) {
     OM.dump(Cur->getOriginID(), OS);
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 3c70cc4a0bef9..f6d556a04672f 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -49,9 +49,13 @@ bool FactsGenerator::hasOrigins(const Expr *E) const {
 /// Propagates origin information from Src to Dst through all levels of
 /// indirection, creating OriginFlowFacts at each level.
 ///
-/// This function enforces a critical type-safety invariant: both lists must
-/// have the same shape (same depth/structure). This invariant ensures that
-/// origins flow only between compatible types during expression evaluation.
+/// This function enforces a critical type-safety invariant: both trees
+/// must have the same pointee-chain depth, and field children are
+/// matched by `FieldDecl`. This invariant ensures that origins flow only
+/// between compatible types during expression evaluation. Field pairs
+/// found on both sides recurse; unmatched fields are skipped, which is
+/// exercised by `CK_DerivedToBase` flows where Base's and Derived's
+/// trees carry distinct direct-field FDs.
 ///
 /// Examples:
 ///   - `int* p = &x;` flows origins from `&x` (depth 1) to `p` (depth 1)
@@ -59,17 +63,24 @@ bool FactsGenerator::hasOrigins(const Expr *E) const {
 ///     * Level 1: pp <- p's address
 ///     * Level 2: (*pp) <- what p points to (i.e., &x)
 ///   - `View v = obj;` flows origins from `obj` (depth 1) to `v` (depth 1)
+///   - `S s2 = s;` flows the top-level origin and recursively flows each
+///     matching `FieldDecl` subtree, so loans on `s.v.inner` propagate to
+///     `s2.v.inner`.
 void FactsGenerator::flow(OriginNode *Dst, OriginNode *Src, bool Kill) {
   if (!Dst)
     return;
   assert(Src &&
          "Dst is non-null but Src is null. List must have the same length");
   assert(Dst->getLength() == Src->getLength() &&
-         "Lists must have the same length");
+         "Pointee chains must have the same length");
 
   while (Dst && Src) {
     CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>(
         Dst->getOriginID(), Src->getOriginID(), Kill));
+    for (const OriginNode::Edge &E : Dst->children())
+      if (E.FD)
+        if (OriginNode *SrcF = Src->getFieldChild(E.FD))
+          flow(E.Child, SrcF, Kill);
     Dst = Dst->getPointeeChild();
     Src = Src->getPointeeChild();
   }
@@ -365,13 +376,18 @@ void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) {
   }
 }
 
+void FactsGenerator::emitReturnEscapes(OriginNode *N, const Expr *RetExpr) {
+  if (!N)
+    return;
+  EscapesInCurrentBlock.push_back(
+      FactMgr.createFact<ReturnEscapeFact>(N->getOriginID(), RetExpr));
+  for (const OriginNode::Edge &E : N->children())
+    emitReturnEscapes(E.Child, RetExpr);
+}
+
 void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
-  if (const Expr *RetExpr = RS->getRetValue()) {
-    if (OriginNode *Node = getOriginNode(*RetExpr))
-      for (OriginNode *L = Node; L != nullptr; L = L->getPointeeChild())
-        EscapesInCurrentBlock.push_back(
-            FactMgr.createFact<ReturnEscapeFact>(L->getOriginID(), RetExpr));
-  }
+  if (const Expr *RetExpr = RS->getRetValue())
+    emitReturnEscapes(getOriginNode(*RetExpr), RetExpr);
 }
 
 void FactsGenerator::handleAssignment(const Expr *TargetExpr,
diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
index a23d7f224b4bc..c1f82796a601e 100644
--- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
@@ -127,21 +127,30 @@ class AnalysisImpl
   /// A read operation makes the origin live with definite confidence, as it
   /// dominates this program point. A write operation kills the liveness of
   /// the origin since it overwrites the value.
+  ///
+  /// Walks the full subtree so loans held by any descendant (pointee
+  /// chain or field child) become visible at the use site.
   Lattice transfer(Lattice In, const UseFact &UF) {
+    return transferUseSubtree(In, UF, UF.getUsedOrigins());
+  }
+
+  Lattice transferUseSubtree(Lattice In, const UseFact &UF,
+                             const OriginNode *Cur) {
+    if (!Cur)
+      return In;
+    OriginID OID = Cur->getOriginID();
     Lattice Out = In;
-    for (const OriginNode *Cur = UF.getUsedOrigins(); Cur;
-         Cur = Cur->getPointeeChild()) {
-      OriginID OID = Cur->getOriginID();
-      // Write kills liveness.
-      if (UF.isWritten()) {
-        Out = Lattice(Factory.remove(Out.LiveOrigins, OID));
-      } else {
-        // Read makes origin live with definite confidence (dominates this
-        // point).
-        Out = Lattice(Factory.add(Out.LiveOrigins, OID,
-                                  LivenessInfo(&UF, LivenessKind::Must)));
-      }
+    // Write kills liveness.
+    if (UF.isWritten()) {
+      Out = Lattice(Factory.remove(Out.LiveOrigins, OID));
+    } else {
+      // Read makes origin live with definite confidence (dominates this
+      // point).
+      Out = Lattice(Factory.add(Out.LiveOrigins, OID,
+                                LivenessInfo(&UF, LivenessKind::Must)));
     }
+    for (const OriginNode::Edge &E : Cur->children())
+      Out = transferUseSubtree(Out, UF, E.Child);
     return Out;
   }
 
diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp
index 41d3adaaae59f..8c659ea770461 100644
--- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp
@@ -193,8 +193,9 @@ OriginNode *OriginManager::createSingleOriginNode(OriginID OID) {
 void OriginManager::attachPointeeChild(OriginNode *Parent,
                                        OriginNode *Pointee) {
   assert(Pointee && "pointee subtree must be non-null");
-  Parent->setChildren(
-      {new (Allocator.Allocate<OriginNode *>()) OriginNode *(Pointee), 1});
+  auto *E = new (Allocator.Allocate<OriginNode::Edge>())
+      OriginNode::Edge{nullptr, Pointee};
+  Parent->setChildren({E, 1});
 }
 
 template <typename T>



More information about the llvm-branch-commits mailing list