[clang] Introduce intra-procedural lifetime analysis in Clang (PR #142313)
Gábor Horváth via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 16 02:03:08 PDT 2025
================
@@ -0,0 +1,753 @@
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <cstdint>
+
+namespace clang {
+namespace {
+
+/// Represents the storage location being borrowed, e.g., a specific stack
+/// variable.
+struct AccessPath {
+ const clang::ValueDecl *D;
+
+ enum class Kind : uint8_t {
+ StackVariable,
+ Temporary, // TODO: Handle.
+ Field, // TODO: Handle like `s.y`.
+ Heap, // TODO: Handle.
+ ArrayElement, // TODO: Handle.
+ Static, // TODO: Handle.
+ };
+
+ Kind PathKind;
+
+ AccessPath(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {}
+};
+
+/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type.
+/// Used for giving ID to loans and origins.
+template <typename Tag> struct ID {
+ uint32_t Value = 0;
+
+ bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; }
+ bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); }
+ bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; }
+ ID<Tag> &operator++() {
+ ++Value;
+ return *this;
+ }
+ void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
+ IDBuilder.AddInteger(Value);
+ }
+};
+
+template <typename Tag>
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) {
+ return OS << ID.Value;
+}
+
+struct LoanTag {};
+struct OriginTag {};
+
+using LoanID = ID<LoanTag>;
+using OriginID = ID<OriginTag>;
+
+/// Information about a single borrow, or "Loan". A loan is created when a
+/// reference or pointer is taken.
+struct Loan {
+ /// TODO: Represent opaque loans.
+ /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
+ /// is represented as empty LoanSet
+ LoanID ID;
+ AccessPath Path;
+ SourceLocation IssueLoc;
+
+ Loan(LoanID id, AccessPath path, SourceLocation loc)
+ : ID(id), Path(path), IssueLoc(loc) {}
+};
+
+/// An Origin is a symbolic identifier that represents the set of possible
+/// loans a pointer-like object could hold at any given time.
+/// TODO: Also represent Origins of complex types (fields, inner types).
+struct Origin {
+ OriginID ID;
+ llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr;
+
+ Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {}
+ Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {}
+
+ const clang::ValueDecl *getDecl() const {
+ return Ptr.dyn_cast<const clang::ValueDecl *>();
+ }
+ const clang::Expr *getExpr() const {
+ return Ptr.dyn_cast<const clang::Expr *>();
+ }
+};
+
+class LoanManager {
+public:
+ LoanManager() = default;
+
+ Loan &addLoan(AccessPath path, SourceLocation loc) {
+ ++NextLoanID;
+ AllLoans.emplace_back(NextLoanID, path, loc);
+ return AllLoans.back();
+ }
+
+ const Loan &getLoan(LoanID id) const {
+ assert(id.Value < AllLoans.size());
+ return AllLoans[id.Value];
+ }
+ llvm::ArrayRef<Loan> getLoans() const { return AllLoans; }
+
+private:
+ LoanID NextLoanID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Loan> AllLoans;
+};
+
+class OriginManager {
+public:
+ OriginManager() = default;
+
+ OriginID getNextOriginID() { return ++NextOriginID; }
+ Origin &addOrigin(OriginID id, const clang::ValueDecl &D) {
+ AllOrigins.emplace_back(id, &D);
+ return AllOrigins.back();
+ }
+ Origin &addOrigin(OriginID id, const clang::Expr &E) {
+ AllOrigins.emplace_back(id, &E);
+ return AllOrigins.back();
+ }
+
+ OriginID get(const Expr &E) {
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ return get(*DRE->getDecl());
+ }
+ auto It = ExprToOriginID.find(&E);
+ assert(It != ExprToOriginID.end());
+ return It->second;
+ }
+
+ OriginID get(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ assert(It != DeclToOriginID.end());
+ return It->second;
+ }
+
+ OriginID getOrCreate(const Expr &E) {
+ auto It = ExprToOriginID.find(&E);
+ if (It != ExprToOriginID.end())
+ return It->second;
+
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ return getOrCreate(*DRE->getDecl());
+ }
+ OriginID NewID = getNextOriginID();
+ addOrigin(NewID, E);
+ ExprToOriginID[&E] = NewID;
+ return NewID;
+ }
+
+ const Origin &getOrigin(OriginID ID) const {
+ assert(ID.Value < AllOrigins.size());
+ return AllOrigins[ID.Value];
+ }
+
+ llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; }
+
+ OriginID getOrCreate(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ if (It != DeclToOriginID.end())
+ return It->second;
+ OriginID NewID = getNextOriginID();
+ addOrigin(NewID, D);
+ DeclToOriginID[&D] = NewID;
+ return NewID;
+ }
+
+private:
+ OriginID NextOriginID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Origin> AllOrigins;
+ llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
+ llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
+};
+
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ AssignOrigin,
+ /// An origin is part of a function's return value.
+ ReturnOfOrigin
+ };
+
+private:
+ Kind K;
+
+protected:
+ Fact(Kind K) : K(K) {}
+
+public:
+ virtual ~Fact() = default;
+ Kind getKind() const { return K; }
+
+ template <typename T> const T *getAs() const {
+ if (T::classof(this))
+ return static_cast<const T *>(this);
+ return nullptr;
+ }
+
+ virtual void dump(llvm::raw_ostream &OS) const {
+ OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
+ }
+};
+
+class IssueFact : public Fact {
+ LoanID LID;
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+
+ IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
+ LoanID getLoanID() const { return LID; }
+ OriginID getOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID()
+ << ")\n";
+ }
+};
+
+class ExpireFact : public Fact {
+ LoanID LID;
+
+public:
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+
+ ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {}
+ LoanID getLoanID() const { return LID; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "Expire (LoanID: " << getLoanID() << ")\n";
+ }
+};
+
+class AssignOriginFact : public Fact {
+ OriginID OIDDest;
+ OriginID OIDSrc;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::AssignOrigin;
+ }
+
+ AssignOriginFact(OriginID OIDDest, OriginID OIDSrc)
+ : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "AssignOrigin (DestID: " << getDestOriginID()
+ << ", SrcID: " << getSrcOriginID() << ")\n";
+ }
+};
+
+class ReturnOfOriginFact : public Fact {
+ OriginID OID;
+
+public:
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::ReturnOfOrigin;
+ }
+
+ ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n";
+ }
+};
+
+class FactManager {
+public:
+ llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end())
+ return It->second;
+ return {};
+ }
+
+ void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
+ if (!NewFacts.empty()) {
+ BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
+ }
+ }
+
+ template <typename FactType, typename... Args>
+ FactType *createFact(Args &&...args) {
+ void *Mem = FactAllocator.Allocate<FactType>();
+ return new (Mem) FactType(std::forward<Args>(args)...);
+ }
+
+ void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Lifetime Analysis Facts:\n";
+ llvm::dbgs() << "==========================================\n";
+ if (const Decl *D = AC.getDecl()) {
+ if (const auto *ND = dyn_cast<NamedDecl>(D))
+ llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
+ }
+ // Print blocks in the order as they appear in code for a stable ordering.
+ ForwardDataflowWorklist worklist(Cfg, AC);
+ for (const CFGBlock *B : Cfg.const_nodes())
+ worklist.enqueueBlock(B);
+ while (const CFGBlock *B = worklist.dequeue()) {
+ llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end()) {
+ for (const Fact *F : It->second) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs());
+ }
+ }
+ llvm::dbgs() << " End of Block\n";
+ }
+ }
+
+ LoanManager &getLoanMgr() { return LoanMgr; }
+ OriginManager &getOriginMgr() { return OriginMgr; }
+
+private:
+ LoanManager LoanMgr;
+ OriginManager OriginMgr;
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
+ BlockToFactsMap;
+ llvm::BumpPtrAllocator FactAllocator;
+};
+
+class FactGenerator : public ConstStmtVisitor<FactGenerator> {
+
+public:
+ FactGenerator(const CFG &Cfg, FactManager &FactMgr, AnalysisDeclContext &AC)
+ : FactMgr(FactMgr), Cfg(Cfg), AC(AC) {}
+
+ void run() {
+ llvm::TimeTraceScope TimeProfile("FactGenerator");
+ // Iterate through the CFG blocks in reverse post-order to ensure that
+ // initializations and destructions are processed in the correct sequence.
+ // TODO: A reverse post-order traversal utility should be provided by
+ // Dataflow framework.
+ ForwardDataflowWorklist Worklist(Cfg, AC);
+ for (const CFGBlock *B : Cfg.const_nodes())
+ Worklist.enqueueBlock(B);
+ while (const CFGBlock *Block = Worklist.dequeue()) {
+ CurrentBlockFacts.clear();
+ for (unsigned I = 0; I < Block->size(); ++I) {
+ const CFGElement &Element = Block->Elements[I];
+ if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>())
+ Visit(CS->getStmt());
+ else if (std::optional<CFGAutomaticObjDtor> DtorOpt =
+ Element.getAs<CFGAutomaticObjDtor>())
+ handleDestructor(*DtorOpt);
+ }
+ FactMgr.addBlockFacts(Block, CurrentBlockFacts);
+ }
+ }
+
+ void VisitDeclStmt(const DeclStmt *DS) {
+ for (const Decl *D : DS->decls())
+ if (const auto *VD = dyn_cast<VarDecl>(D))
+ if (hasOrigin(VD->getType()))
+ if (const Expr *InitExpr = VD->getInit())
+ addAssignOriginFact(*VD, *InitExpr);
+ }
+
+ void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) {
+ /// TODO: Handle nullptr expr as a special 'null' loan. Uninintialed
----------------
Xazax-hun wrote:
Typo: Uninintialed -> Uninitialized
https://github.com/llvm/llvm-project/pull/142313
More information about the cfe-commits
mailing list