[clang] Introduce intra-procedural lifetime analysis in Clang (PR #142313)
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 10 13:46:19 PDT 2025
================
@@ -0,0 +1,728 @@
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <vector>
+
+namespace clang {
+namespace {
+
+struct Point {
+ const clang::CFGBlock *Block;
+ /// Index into Block->Elements().
+ unsigned ElementIndex;
+
+ Point(const clang::CFGBlock *B = nullptr, unsigned Idx = 0)
+ : Block(B), ElementIndex(Idx) {}
+
+ bool operator==(const Point &Other) const {
+ return Block == Other.Block && ElementIndex == Other.ElementIndex;
+ }
+};
+
+/// Represents the storage location being borrowed, e.g., a specific stack
+/// variable.
+/// TODO: Handle member accesseslike `s.y`.
+struct Path {
+ const clang::ValueDecl *D;
+
+ enum class Kind : uint8_t {
+ StackVariable,
+ Heap, // TODO: Handle.
+ Field, // TODO: Handle.
+ ArrayElement, // TODO: Handle.
+ TemporaryObject, // TODO: Handle.
+ StaticOrGlobal, // TODO: Handle.
+ };
+
+ Kind PathKind;
+
+ Path(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {}
+};
+
+using LoanID = uint32_t;
+using OriginID = uint32_t;
+
+/// Information about a single borrow, or "Loan". A loan is created when a
+/// reference or pointer is taken.
+struct LoanInfo {
+ /// TODO: Represent opaque loans.
+ /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
+ /// is represented as empty LoanSet
+ LoanID ID;
+ Path SourcePath;
+ SourceLocation IssueLoc;
+
+ LoanInfo(LoanID id, Path path, SourceLocation loc)
+ : ID(id), SourcePath(path), IssueLoc(loc) {}
+};
+
+enum class OriginKind : uint8_t { Variable, ExpressionResult };
+
+/// An Origin is a symbolic identifier that represents the set of possible
+/// loans a pointer-like object could hold at any given time.
+/// TODO: Also represent Origins of complex types (fields, inner types).
+struct OriginInfo {
+ OriginID ID;
+ OriginKind Kind;
+ union {
+ const clang::ValueDecl *Decl;
+ const clang::Expr *Expression;
+ };
+ OriginInfo(OriginID id, OriginKind kind, const clang::ValueDecl *D)
+ : ID(id), Kind(kind), Decl(D) {}
+ OriginInfo(OriginID id, OriginKind kind, const clang::Expr *E)
+ : ID(id), Kind(kind), Expression(E) {}
+};
+
+class LoanManager {
+public:
+ LoanManager() = default;
+
+ LoanInfo &addLoanInfo(Path path, SourceLocation loc) {
+ NextLoanIDVal++;
+ AllLoans.emplace_back(NextLoanIDVal, path, loc);
+ return AllLoans.back();
+ }
+
+ const LoanInfo *getLoanInfo(LoanID id) const {
+ if (id < AllLoans.size())
+ return &AllLoans[id];
+ return nullptr;
+ }
+ llvm::ArrayRef<LoanInfo> getLoanInfos() const { return AllLoans; }
+
+private:
+ LoanID NextLoanIDVal = 0;
+ llvm::SmallVector<LoanInfo> AllLoans;
+};
+
+class OriginManager {
+public:
+ OriginManager() = default;
+
+ OriginID getNextOriginID() { return NextOriginIDVal++; }
+ OriginInfo &addOriginInfo(OriginID id, const clang::ValueDecl *D) {
+ assert(D != nullptr);
+ AllOrigins.emplace_back(id, OriginKind::Variable, D);
+ return AllOrigins.back();
+ }
+ OriginInfo &addOriginInfo(OriginID id, const clang::Expr *E) {
+ assert(E != nullptr);
+ AllOrigins.emplace_back(id, OriginKind::ExpressionResult, E);
+ return AllOrigins.back();
+ }
+
+ OriginID getOrCreate(const Expr *E) {
+ auto It = ExprToOriginID.find(E);
+ if (It != ExprToOriginID.end())
+ return It->second;
+
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ return getOrCreate(DRE->getDecl());
+ }
+ OriginID NewID = getNextOriginID();
+ addOriginInfo(NewID, E);
+ ExprToOriginID[E] = NewID;
+ return NewID;
+ }
+
+ const OriginInfo *getOriginInfo(OriginID id) const {
+ if (id < AllOrigins.size())
+ return &AllOrigins[id];
+ return nullptr;
+ }
+
+ llvm::ArrayRef<OriginInfo> getOriginInfos() const { return AllOrigins; }
+
+ OriginID getOrCreate(const ValueDecl *D) {
+ auto It = DeclToOriginID.find(D);
+ if (It != DeclToOriginID.end())
+ return It->second;
+ OriginID NewID = getNextOriginID();
+ addOriginInfo(NewID, D);
+ DeclToOriginID[D] = NewID;
+ return NewID;
+ }
+
+private:
+ OriginID NextOriginIDVal = 0;
+ llvm::SmallVector<OriginInfo> AllOrigins;
+ llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
+ llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
+};
+
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ AssignOrigin,
+ /// An origin is part of a function's return value.
+ ReturnOfOrigin
+ };
+
+private:
+ Kind K;
+
+protected:
+ Point P;
+ Fact(Kind K, Point Pt) : K(K), P(Pt) {}
+
+public:
+ virtual ~Fact() = default;
+ Kind getKind() const { return K; }
+ Point getPoint() const { return P; }
+
+ template <typename T> const T *getAs() const {
+ if (T::classof(this))
+ return static_cast<const T *>(this);
+ return nullptr;
+ }
+
+ virtual void dump(llvm::raw_ostream &OS) const {
+ OS << "Fact (Kind: " << static_cast<int>(K) << ", Point: B"
+ << P.Block->getBlockID() << ":" << P.ElementIndex << ")\n";
+ }
+};
+
+class IssueFact : public Fact {
+ LoanID LID;
+ OriginID OID;
+
+public:
+ IssueFact(LoanID LID, OriginID OID, Point Pt)
+ : Fact(Kind::Issue, Pt), LID(LID), OID(OID) {}
+ LoanID getLoanID() const { return LID; }
+ OriginID getOriginID() const { return OID; }
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID()
+ << ")\n";
+ }
+};
+
+class ExpireFact : public Fact {
+ LoanID LID;
+
+public:
+ ExpireFact(LoanID LID, Point Pt) : Fact(Kind::Expire, Pt), LID(LID) {}
+ LoanID getLoanID() const { return LID; }
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "Expire (LoanID: " << getLoanID() << ")\n";
+ }
+};
+
+class AssignOriginFact : public Fact {
+ OriginID OIDDest;
+ OriginID OIDSrc;
+
+public:
+ AssignOriginFact(OriginID OIDDest, OriginID OIDSrc, Point Pt)
+ : Fact(Kind::AssignOrigin, Pt), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::AssignOrigin;
+ }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "AssignOrigin (DestID: " << getDestOriginID()
+ << ", SrcID: " << getSrcOriginID() << ")\n";
+ }
+};
+
+class ReturnOfOriginFact : public Fact {
+ OriginID OID;
+
+public:
+ ReturnOfOriginFact(OriginID OID, Point Pt)
+ : Fact(Kind::ReturnOfOrigin, Pt), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::ReturnOfOrigin;
+ }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n";
+ }
+};
+
+class FactManager {
+public:
+ llvm::ArrayRef<Fact *> getFacts(const CFGBlock *B) const {
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end())
+ return It->second;
+ return {};
+ }
+
+ void addFact(const CFGBlock *B, Fact *NewFact) {
+ BlockToFactsMap[B].push_back(NewFact);
+ }
+
+ template <typename FactType, typename... Args>
+ FactType *createFact(Args &&...args) {
+ void *Mem = FactAllocator.Allocate<FactType>();
+ return new (Mem) FactType(std::forward<Args>(args)...);
+ }
+
+ void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Lifetime Analysis Facts:\n";
+ llvm::dbgs() << "==========================================\n";
+ if (const Decl *D = AC.getDecl()) {
+ if (const auto *ND = dyn_cast<NamedDecl>(D))
+ llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
+ }
+ // Print blocks in the order as they appear in code for a stable ordering.
+ ForwardDataflowWorklist worklist(Cfg, AC);
+ for (const CFGBlock *B : Cfg.const_nodes())
+ worklist.enqueueBlock(B);
+ while (const CFGBlock *B = worklist.dequeue()) {
+ llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end()) {
+ for (const Fact *F : It->second) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs());
+ }
+ }
+ llvm::dbgs() << " End of Block\n";
+ }
+ }
+
+private:
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<Fact *>>
+ BlockToFactsMap;
+ llvm::BumpPtrAllocator FactAllocator;
+};
+
+struct FactsContext {
+ FactManager Facts;
+ LoanManager Loans;
+ OriginManager Origins;
+};
+
+class FactGenerator : public ConstStmtVisitor<FactGenerator> {
+
+public:
+ FactGenerator(const CFG &Cfg, FactsContext &FactsCtx, AnalysisDeclContext &AC)
+ : FactsCtx(FactsCtx), Cfg(Cfg), AC(AC), CurrentBlock(nullptr) {}
+
+ void run() {
+ llvm::TimeTraceScope TimeProfile("Fact Generation");
+ // Iterate through the CFG blocks in pre-order to ensure that
+ // initializations and destructions are processed in the correct sequence.
+ // TODO: A pre-order traversal utility should be provided by Dataflow
+ // framework.
----------------
ymand wrote:
We have reverse post-order; can you expand why pre-order would be preferable? RPO has the advantage of providing a topological sort, so (IIRC) you're guaranteed to visit dominator nodes before any nodes they dominate. simple example: https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings
Pre-order: ABDC
Reverse post-order: ACBD
so, in RPO, C and B are both guaranteed to come before D.
https://github.com/llvm/llvm-project/pull/142313
More information about the cfe-commits
mailing list