[clang] Introduce intra-procedural lifetime analysis in Clang (PR #142313)
Utkarsh Saxena via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 13 11:15:34 PDT 2025
https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/142313
>From 0cd187b01e61b200d92ca0b640789c1586075142 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Mon, 2 Jun 2025 17:53:14 +0000
Subject: [PATCH 1/2] Introduce Intra-procedural lifetime analysis in Clang
---
.../clang/Analysis/Analyses/LifetimeSafety.h | 13 +
clang/lib/Analysis/CMakeLists.txt | 1 +
clang/lib/Analysis/LifetimeSafety.cpp | 728 ++++++++++++++++++
clang/lib/Sema/AnalysisBasedWarnings.cpp | 8 +
.../Sema/warn-lifetime-safety-dataflow.cpp | 362 +++++++++
5 files changed, 1112 insertions(+)
create mode 100644 clang/include/clang/Analysis/Analyses/LifetimeSafety.h
create mode 100644 clang/lib/Analysis/LifetimeSafety.cpp
create mode 100644 clang/test/Sema/warn-lifetime-safety-dataflow.cpp
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
new file mode 100644
index 0000000000000..daf24fff72b9b
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
@@ -0,0 +1,13 @@
+#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H
+#define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H
+#include "clang/AST/DeclBase.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+namespace clang {
+
+void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
+ AnalysisDeclContext &AC);
+
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIME_SAFETY_H
diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt
index 8cd3990db4c3e..0523d92480cb3 100644
--- a/clang/lib/Analysis/CMakeLists.txt
+++ b/clang/lib/Analysis/CMakeLists.txt
@@ -21,6 +21,7 @@ add_clang_library(clangAnalysis
FixitUtil.cpp
IntervalPartition.cpp
IssueHash.cpp
+ LifetimeSafety.cpp
LiveVariables.cpp
MacroExpansionContext.cpp
ObjCNoReturn.cpp
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
new file mode 100644
index 0000000000000..8dbb4dc37026c
--- /dev/null
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -0,0 +1,728 @@
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/ADT/ImmutableMap.h"
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <vector>
+
+namespace clang {
+namespace {
+
+struct Point {
+ const clang::CFGBlock *Block;
+ /// Index into Block->Elements().
+ unsigned ElementIndex;
+
+ Point(const clang::CFGBlock *B = nullptr, unsigned Idx = 0)
+ : Block(B), ElementIndex(Idx) {}
+
+ bool operator==(const Point &Other) const {
+ return Block == Other.Block && ElementIndex == Other.ElementIndex;
+ }
+};
+
+/// Represents the storage location being borrowed, e.g., a specific stack
+/// variable.
+/// TODO: Handle member accesseslike `s.y`.
+struct Path {
+ const clang::ValueDecl *D;
+
+ enum class Kind : uint8_t {
+ StackVariable,
+ Heap, // TODO: Handle.
+ Field, // TODO: Handle.
+ ArrayElement, // TODO: Handle.
+ TemporaryObject, // TODO: Handle.
+ StaticOrGlobal, // TODO: Handle.
+ };
+
+ Kind PathKind;
+
+ Path(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {}
+};
+
+using LoanID = uint32_t;
+using OriginID = uint32_t;
+
+/// Information about a single borrow, or "Loan". A loan is created when a
+/// reference or pointer is taken.
+struct LoanInfo {
+ /// TODO: Represent opaque loans.
+ /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
+ /// is represented as empty LoanSet
+ LoanID ID;
+ Path SourcePath;
+ SourceLocation IssueLoc;
+
+ LoanInfo(LoanID id, Path path, SourceLocation loc)
+ : ID(id), SourcePath(path), IssueLoc(loc) {}
+};
+
+enum class OriginKind : uint8_t { Variable, ExpressionResult };
+
+/// An Origin is a symbolic identifier that represents the set of possible
+/// loans a pointer-like object could hold at any given time.
+/// TODO: Also represent Origins of complex types (fields, inner types).
+struct OriginInfo {
+ OriginID ID;
+ OriginKind Kind;
+ union {
+ const clang::ValueDecl *Decl;
+ const clang::Expr *Expression;
+ };
+ OriginInfo(OriginID id, OriginKind kind, const clang::ValueDecl *D)
+ : ID(id), Kind(kind), Decl(D) {}
+ OriginInfo(OriginID id, OriginKind kind, const clang::Expr *E)
+ : ID(id), Kind(kind), Expression(E) {}
+};
+
+class LoanManager {
+public:
+ LoanManager() = default;
+
+ LoanInfo &addLoanInfo(Path path, SourceLocation loc) {
+ NextLoanIDVal++;
+ AllLoans.emplace_back(NextLoanIDVal, path, loc);
+ return AllLoans.back();
+ }
+
+ const LoanInfo *getLoanInfo(LoanID id) const {
+ if (id < AllLoans.size())
+ return &AllLoans[id];
+ return nullptr;
+ }
+ llvm::ArrayRef<LoanInfo> getLoanInfos() const { return AllLoans; }
+
+private:
+ LoanID NextLoanIDVal = 0;
+ llvm::SmallVector<LoanInfo> AllLoans;
+};
+
+class OriginManager {
+public:
+ OriginManager() = default;
+
+ OriginID getNextOriginID() { return NextOriginIDVal++; }
+ OriginInfo &addOriginInfo(OriginID id, const clang::ValueDecl *D) {
+ assert(D != nullptr);
+ AllOrigins.emplace_back(id, OriginKind::Variable, D);
+ return AllOrigins.back();
+ }
+ OriginInfo &addOriginInfo(OriginID id, const clang::Expr *E) {
+ assert(E != nullptr);
+ AllOrigins.emplace_back(id, OriginKind::ExpressionResult, E);
+ return AllOrigins.back();
+ }
+
+ OriginID getOrCreate(const Expr *E) {
+ auto It = ExprToOriginID.find(E);
+ if (It != ExprToOriginID.end())
+ return It->second;
+
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ return getOrCreate(DRE->getDecl());
+ }
+ OriginID NewID = getNextOriginID();
+ addOriginInfo(NewID, E);
+ ExprToOriginID[E] = NewID;
+ return NewID;
+ }
+
+ const OriginInfo *getOriginInfo(OriginID id) const {
+ if (id < AllOrigins.size())
+ return &AllOrigins[id];
+ return nullptr;
+ }
+
+ llvm::ArrayRef<OriginInfo> getOriginInfos() const { return AllOrigins; }
+
+ OriginID getOrCreate(const ValueDecl *D) {
+ auto It = DeclToOriginID.find(D);
+ if (It != DeclToOriginID.end())
+ return It->second;
+ OriginID NewID = getNextOriginID();
+ addOriginInfo(NewID, D);
+ DeclToOriginID[D] = NewID;
+ return NewID;
+ }
+
+private:
+ OriginID NextOriginIDVal = 0;
+ llvm::SmallVector<OriginInfo> AllOrigins;
+ llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
+ llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
+};
+
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ AssignOrigin,
+ /// An origin is part of a function's return value.
+ ReturnOfOrigin
+ };
+
+private:
+ Kind K;
+
+protected:
+ Point P;
+ Fact(Kind K, Point Pt) : K(K), P(Pt) {}
+
+public:
+ virtual ~Fact() = default;
+ Kind getKind() const { return K; }
+ Point getPoint() const { return P; }
+
+ template <typename T> const T *getAs() const {
+ if (T::classof(this))
+ return static_cast<const T *>(this);
+ return nullptr;
+ }
+
+ virtual void dump(llvm::raw_ostream &OS) const {
+ OS << "Fact (Kind: " << static_cast<int>(K) << ", Point: B"
+ << P.Block->getBlockID() << ":" << P.ElementIndex << ")\n";
+ }
+};
+
+class IssueFact : public Fact {
+ LoanID LID;
+ OriginID OID;
+
+public:
+ IssueFact(LoanID LID, OriginID OID, Point Pt)
+ : Fact(Kind::Issue, Pt), LID(LID), OID(OID) {}
+ LoanID getLoanID() const { return LID; }
+ OriginID getOriginID() const { return OID; }
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID()
+ << ")\n";
+ }
+};
+
+class ExpireFact : public Fact {
+ LoanID LID;
+
+public:
+ ExpireFact(LoanID LID, Point Pt) : Fact(Kind::Expire, Pt), LID(LID) {}
+ LoanID getLoanID() const { return LID; }
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "Expire (LoanID: " << getLoanID() << ")\n";
+ }
+};
+
+class AssignOriginFact : public Fact {
+ OriginID OIDDest;
+ OriginID OIDSrc;
+
+public:
+ AssignOriginFact(OriginID OIDDest, OriginID OIDSrc, Point Pt)
+ : Fact(Kind::AssignOrigin, Pt), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::AssignOrigin;
+ }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "AssignOrigin (DestID: " << getDestOriginID()
+ << ", SrcID: " << getSrcOriginID() << ")\n";
+ }
+};
+
+class ReturnOfOriginFact : public Fact {
+ OriginID OID;
+
+public:
+ ReturnOfOriginFact(OriginID OID, Point Pt)
+ : Fact(Kind::ReturnOfOrigin, Pt), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
+ static bool classof(const Fact *F) {
+ return F->getKind() == Kind::ReturnOfOrigin;
+ }
+ void dump(llvm::raw_ostream &OS) const override {
+ OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n";
+ }
+};
+
+class FactManager {
+public:
+ llvm::ArrayRef<Fact *> getFacts(const CFGBlock *B) const {
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end())
+ return It->second;
+ return {};
+ }
+
+ void addFact(const CFGBlock *B, Fact *NewFact) {
+ BlockToFactsMap[B].push_back(NewFact);
+ }
+
+ template <typename FactType, typename... Args>
+ FactType *createFact(Args &&...args) {
+ void *Mem = FactAllocator.Allocate<FactType>();
+ return new (Mem) FactType(std::forward<Args>(args)...);
+ }
+
+ void dump(const CFG &Cfg, AnalysisDeclContext &AC) const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Lifetime Analysis Facts:\n";
+ llvm::dbgs() << "==========================================\n";
+ if (const Decl *D = AC.getDecl()) {
+ if (const auto *ND = dyn_cast<NamedDecl>(D))
+ llvm::dbgs() << "Function: " << ND->getQualifiedNameAsString() << "\n";
+ }
+ // Print blocks in the order as they appear in code for a stable ordering.
+ ForwardDataflowWorklist worklist(Cfg, AC);
+ for (const CFGBlock *B : Cfg.const_nodes())
+ worklist.enqueueBlock(B);
+ while (const CFGBlock *B = worklist.dequeue()) {
+ llvm::dbgs() << " Block B" << B->getBlockID() << ":\n";
+ auto It = BlockToFactsMap.find(B);
+ if (It != BlockToFactsMap.end()) {
+ for (const Fact *F : It->second) {
+ llvm::dbgs() << " ";
+ F->dump(llvm::dbgs());
+ }
+ }
+ llvm::dbgs() << " End of Block\n";
+ }
+ }
+
+private:
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<Fact *>>
+ BlockToFactsMap;
+ llvm::BumpPtrAllocator FactAllocator;
+};
+
+struct FactsContext {
+ FactManager Facts;
+ LoanManager Loans;
+ OriginManager Origins;
+};
+
+class FactGenerator : public ConstStmtVisitor<FactGenerator> {
+
+public:
+ FactGenerator(const CFG &Cfg, FactsContext &FactsCtx, AnalysisDeclContext &AC)
+ : FactsCtx(FactsCtx), Cfg(Cfg), AC(AC), CurrentBlock(nullptr) {}
+
+ void run() {
+ llvm::TimeTraceScope TimeProfile("Fact Generation");
+ // Iterate through the CFG blocks in pre-order to ensure that
+ // initializations and destructions are processed in the correct sequence.
+ // TODO: A pre-order traversal utility should be provided by Dataflow
+ // framework.
+ ForwardDataflowWorklist Worklist(Cfg, AC);
+ for (const CFGBlock *B : Cfg.const_nodes())
+ Worklist.enqueueBlock(B);
+ while (const CFGBlock *Block = Worklist.dequeue()) {
+ CurrentBlock = Block;
+ for (unsigned I = 0; I < Block->size(); ++I) {
+ const CFGElement &Element = Block->Elements[I];
+ CurrentPoint = Point(Block, I);
+ if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>())
+ Visit(CS->getStmt());
+ else if (std::optional<CFGAutomaticObjDtor> DtorOpt =
+ Element.getAs<CFGAutomaticObjDtor>())
+ handleDestructor(*DtorOpt);
+ }
+ }
+ }
+
+ void VisitDeclStmt(const DeclStmt *DS) {
+ for (const Decl *D : DS->decls()) {
+ if (const auto *VD = dyn_cast<VarDecl>(D)) {
+ if (hasOrigin(VD->getType())) {
+ if (const Expr *InitExpr = VD->getInit()) {
+ OriginID DestOID = FactsCtx.Origins.getOrCreate(VD);
+ OriginID SrcOID = FactsCtx.Origins.getOrCreate(InitExpr);
+ FactsCtx.Facts.addFact(CurrentBlock,
+ FactsCtx.Facts.createFact<AssignOriginFact>(
+ DestOID, SrcOID, CurrentPoint));
+ }
+ }
+ }
+ }
+ }
+
+ void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
+ if (!hasOrigin(ICE->getType()))
+ return;
+ // An ImplicitCastExpr node itself gets an origin, which flows from the
+ // origin of its sub-expression (after stripping its own parens/casts).
+ if (ICE->getCastKind() == CK_LValueToRValue) {
+ OriginID IceOID = FactsCtx.Origins.getOrCreate(ICE);
+ OriginID SubExprOID = FactsCtx.Origins.getOrCreate(ICE->getSubExpr());
+ FactsCtx.Facts.addFact(CurrentBlock,
+ FactsCtx.Facts.createFact<AssignOriginFact>(
+ IceOID, SubExprOID, CurrentPoint));
+ }
+ }
+
+ void VisitUnaryOperator(const UnaryOperator *UO) {
+ if (UO->getOpcode() == UO_AddrOf) {
+ const Expr *SubExpr = UO->getSubExpr();
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(SubExpr)) {
+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+ // Check if it's a local variable.
+ if (VD->hasLocalStorage()) {
+ OriginID OID = FactsCtx.Origins.getOrCreate(UO);
+ Path AddrOfLocalVarPath(VD, Path::Kind::StackVariable);
+ LoanInfo &Loan = FactsCtx.Loans.addLoanInfo(AddrOfLocalVarPath,
+ UO->getOperatorLoc());
+ FactsCtx.Facts.addFact(CurrentBlock,
+ FactsCtx.Facts.createFact<IssueFact>(
+ Loan.ID, OID, CurrentPoint));
+ }
+ }
+ }
+ }
+ }
+
+ void VisitReturnStmt(const ReturnStmt *RS) {
+ if (const Expr *RetExpr = RS->getRetValue()) {
+ if (hasOrigin(RetExpr->getType())) {
+ OriginID OID = FactsCtx.Origins.getOrCreate(RetExpr);
+ FactsCtx.Facts.addFact(
+ CurrentBlock,
+ FactsCtx.Facts.createFact<ReturnOfOriginFact>(OID, CurrentPoint));
+ }
+ }
+ }
+
+ void VisitBinaryOperator(const BinaryOperator *BO) {
+ if (BO->isAssignmentOp()) {
+ const Expr *LHSExpr = BO->getLHS();
+ const Expr *RHSExpr = BO->getRHS();
+
+ // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`
+ // LHS must be a pointer/reference type that can be an origin.
+ // RHS must also represent an origin (either another pointer/ref or an
+ // address-of).
+ if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) {
+ if (const auto *VD_LHS =
+ dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl());
+ VD_LHS && hasOrigin(VD_LHS->getType())) {
+ OriginID DestOID = FactsCtx.Origins.getOrCreate(VD_LHS);
+ OriginID SrcOID = FactsCtx.Origins.getOrCreate(RHSExpr);
+ FactsCtx.Facts.addFact(CurrentBlock,
+ FactsCtx.Facts.createFact<AssignOriginFact>(
+ DestOID, SrcOID, CurrentPoint));
+ }
+ }
+ }
+ }
+
+private:
+ // Check if a type have an origin.
+ bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); }
+
+ void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) {
+ /// TODO: Also handle trivial destructors (e.g., for `int`
+ // variables) which will never have a CFGAutomaticObjDtor node.
+ /// TODO: Handle loans to temporaries.
+ const VarDecl *DestructedVD = DtorOpt.getVarDecl();
+ if (!DestructedVD)
+ return;
+ // Iterate through all loans to see if any expire.
+ for (const LoanInfo &Loan : FactsCtx.Loans.getLoanInfos()) {
+ const Path &LoanPath = Loan.SourcePath;
+ // Check if the loan is for a stack variable and if that variable
+ // is the one being destructed.
+ if (LoanPath.PathKind == Path::Kind::StackVariable) {
+ if (LoanPath.D == DestructedVD) {
+ FactsCtx.Facts.addFact(
+ CurrentBlock,
+ FactsCtx.Facts.createFact<ExpireFact>(Loan.ID, CurrentPoint));
+ }
+ }
+ }
+ }
+
+ FactsContext &FactsCtx;
+ const CFG &Cfg;
+ AnalysisDeclContext &AC;
+ const CFGBlock *CurrentBlock;
+ Point CurrentPoint;
+};
+
+// ========================================================================= //
+// The Dataflow Lattice
+// ========================================================================= //
+
+// Using LLVM's immutable collections is efficient for dataflow analysis
+// as it avoids deep copies during state transitions.
+// TODO(opt): Consider using a bitset to represent the set of loans.
+using LoanSet = llvm::ImmutableSet<LoanID>;
+using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
+
+/// A context object to hold the factories for immutable collections, ensuring
+/// that all created states share the same underlying memory management.
+struct LifetimeFactory {
+ OriginLoanMap::Factory OriginMapFact;
+ LoanSet::Factory LoanSetFact;
+
+ LoanSet createLoanSet(LoanID LID) {
+ return LoanSetFact.add(LoanSetFact.getEmptySet(), LID);
+ }
+};
+
+/// LifetimeLattice represents the state of our analysis at a given program
+/// point. It is an immutable object, and all operations produce a new
+/// instance rather than modifying the existing one.
+struct LifetimeLattice {
+ /// The map from an origin to the set of loans it contains.
+ /// TODO(opt): To reduce the lattice size, propagate origins of declarations,
+ /// not expressions, because expressions are not visible across blocks.
+ OriginLoanMap Origins = OriginLoanMap(nullptr);
+
+ explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {}
+ LifetimeLattice() = default;
+
+ bool operator==(const LifetimeLattice &Other) const {
+ return Origins == Other.Origins;
+ }
+ bool operator!=(const LifetimeLattice &Other) const {
+ return !(*this == Other);
+ }
+
+ LoanSet getLoans(OriginID OID, LifetimeFactory &Factory) const {
+ if (auto *Loans = Origins.lookup(OID))
+ return *Loans;
+ return Factory.LoanSetFact.getEmptySet();
+ }
+
+ /// Computes the union of two lattices by performing a key-wise merge of
+ /// their OriginLoanMaps.
+ LifetimeLattice merge(const LifetimeLattice &Other,
+ LifetimeFactory &Factory) const {
+ /// Merge the smaller map into the larger one ensuring we iterate over the
+ /// smaller map.
+ if (Origins.getHeight() < Other.Origins.getHeight())
+ return Other.merge(*this, Factory);
+
+ OriginLoanMap MergedState = Origins;
+ // For each origin in the other map, union its loan set with ours.
+ for (const auto &Entry : Other.Origins) {
+ OriginID OID = Entry.first;
+ LoanSet OtherLoanSet = Entry.second;
+ MergedState = Factory.OriginMapFact.add(
+ MergedState, OID,
+ merge(getLoans(OID, Factory), OtherLoanSet, Factory));
+ }
+ return LifetimeLattice(MergedState);
+ }
+
+ LoanSet merge(LoanSet a, LoanSet b, LifetimeFactory &Factory) const {
+ /// Merge the smaller set into the larger one ensuring we iterate over the
+ /// smaller set.
+ if (a.getHeight() < b.getHeight())
+ std::swap(a, b);
+ LoanSet Result = a;
+ for (LoanID LID : b) {
+ /// TODO(opt): Profiling shows that this loop is a major performance
+ /// bottleneck. Investigate using a BitVector to represent the set of
+ /// loans for improved merge performance.
+ Result = Factory.LoanSetFact.add(Result, LID);
+ }
+ return Result;
+ }
+
+ void dump(llvm::raw_ostream &OS) const {
+ OS << "LifetimeLattice State:\n";
+ if (Origins.isEmpty())
+ OS << " <empty>\n";
+ for (const auto &Entry : Origins) {
+ if (Entry.second.isEmpty())
+ OS << " Origin " << Entry.first << " contains no loans\n";
+ for (const LoanID &LID : Entry.second)
+ OS << " Origin " << Entry.first << " contains Loan " << LID << "\n";
+ }
+ }
+};
+
+// ========================================================================= //
+// The Transfer Function
+// ========================================================================= //
+class Transferer {
+ FactManager &AllFacts;
+ LifetimeFactory &Factory;
+
+public:
+ explicit Transferer(FactManager &F, LifetimeFactory &Factory)
+ : AllFacts(F), Factory(Factory) {}
+
+ /// Computes the exit state of a block by applying all its facts sequentially
+ /// to a given entry state.
+ /// TODO: We might need to store intermediate states per-fact in the block for
+ /// later analysis.
+ LifetimeLattice transferBlock(const CFGBlock *Block,
+ LifetimeLattice EntryState) {
+ LifetimeLattice BlockState = EntryState;
+ llvm::ArrayRef<Fact *> Facts = AllFacts.getFacts(Block);
+
+ for (const Fact *F : Facts) {
+ BlockState = transferFact(BlockState, F);
+ }
+ return BlockState;
+ }
+
+private:
+ LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) {
+ switch (F->getKind()) {
+ case Fact::Kind::Issue:
+ return transfer(In, *F->getAs<IssueFact>());
+ case Fact::Kind::AssignOrigin:
+ return transfer(In, *F->getAs<AssignOriginFact>());
+ // Expire and ReturnOfOrigin facts don't modify the Origins and the State.
+ case Fact::Kind::Expire:
+ case Fact::Kind::ReturnOfOrigin:
+ return In;
+ }
+ llvm_unreachable("Unknown fact kind");
+ }
+
+ /// A new loan is issued to the origin. Old loans are erased.
+ LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) {
+ OriginID OID = F.getOriginID();
+ LoanID LID = F.getLoanID();
+ return LifetimeLattice(
+ Factory.OriginMapFact.add(In.Origins, OID, Factory.createLoanSet(LID)));
+ }
+
+ /// The destination origin's loan set is replaced by the source's.
+ /// This implicitly "resets" the old loans of the destination.
+ LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) {
+ OriginID DestOID = F.getDestOriginID();
+ OriginID SrcOID = F.getSrcOriginID();
+ LoanSet SrcLoans = InState.getLoans(SrcOID, Factory);
+ return LifetimeLattice(
+ Factory.OriginMapFact.add(InState.Origins, DestOID, SrcLoans));
+ }
+};
+// ========================================================================= //
+// Dataflow analysis
+// ========================================================================= //
+
+/// Drives the intra-procedural dataflow analysis.
+///
+/// Orchestrates the analysis by iterating over the CFG using a worklist
+/// algorithm. It computes a fixed point by propagating the LifetimeLattice
+/// state through each block until the state no longer changes.
+/// TODO: Maybe use the dataflow framework!
+class LifetimeDataflow {
+ const CFG &Cfg;
+ AnalysisDeclContext &AC;
+ LifetimeFactory LifetimeFact;
+
+ Transferer Xfer;
+
+ /// Stores the merged analysis state at the entry of each CFG block.
+ llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates;
+ /// Stores the analysis state at the exit of each CFG block, after the
+ /// transfer function has been applied.
+ llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates;
+
+public:
+ LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC)
+ : Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {}
+
+ void run() {
+ llvm::TimeTraceScope TimeProfile("Lifetime Dataflow");
+ ForwardDataflowWorklist Worklist(Cfg, AC);
+ const CFGBlock *Entry = &Cfg.getEntry();
+ BlockEntryStates[Entry] = LifetimeLattice{};
+ Worklist.enqueueBlock(Entry);
+ while (const CFGBlock *B = Worklist.dequeue()) {
+ LifetimeLattice EntryState = getEntryState(B);
+ LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState);
+ BlockExitStates[B] = ExitState;
+
+ for (const CFGBlock *Successor : B->succs()) {
+ auto SuccIt = BlockEntryStates.find(Successor);
+ LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end())
+ ? SuccIt->second
+ : LifetimeLattice{};
+ LifetimeLattice NewSuccEntryState =
+ OldSuccEntryState.merge(ExitState, LifetimeFact);
+ if (SuccIt == BlockEntryStates.end() ||
+ NewSuccEntryState != OldSuccEntryState) {
+ BlockEntryStates[Successor] = NewSuccEntryState;
+ Worklist.enqueueBlock(Successor);
+ }
+ }
+ }
+ }
+
+ void dump() const {
+ llvm::dbgs() << "==========================================\n";
+ llvm::dbgs() << " Dataflow results:\n";
+ llvm::dbgs() << "==========================================\n";
+ const CFGBlock &B = Cfg.getExit();
+ getExitState(&B).dump(llvm::dbgs());
+ }
+
+ LifetimeLattice getEntryState(const CFGBlock *B) const {
+ auto It = BlockEntryStates.find(B);
+ if (It != BlockEntryStates.end()) {
+ return It->second;
+ }
+ return LifetimeLattice{};
+ }
+
+ LifetimeLattice getExitState(const CFGBlock *B) const {
+ auto It = BlockExitStates.find(B);
+ if (It != BlockExitStates.end()) {
+ return It->second;
+ }
+ return LifetimeLattice{};
+ }
+};
+
+// ========================================================================= //
+// TODO: Analysing dataflow results and error reporting.
+// ========================================================================= //
+} // anonymous namespace
+
+void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
+ AnalysisDeclContext &AC) {
+ llvm::TimeTraceScope TimeProfile("Lifetime Analysis");
+ FactsContext FactsCtx;
+ FactGenerator FactGen(Cfg, FactsCtx, AC);
+ FactGen.run();
+ DEBUG_WITH_TYPE("LifetimeFacts", FactsCtx.Facts.dump(Cfg, AC));
+
+ /// TODO(opt): Consider optimizing individual blocks before running the
+ /// dataflow analysis.
+ /// 1. Expression Origins: These are assigned once and read at most once,
+ /// forming simple chains. These chains can be compressed into a single
+ /// assignment.
+ /// 2. Block-Local Loans: Origins of expressions are never read by other
+ /// blocks; only Decls are visible. Therefore, loans in a block that
+ /// never reach an Origin associated with a Decl can be safely dropped by
+ /// the analysis.
+ LifetimeDataflow Dataflow(Cfg, FactsCtx.Facts, AC);
+ Dataflow.run();
+ DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump());
+}
+} // namespace clang
\ No newline at end of file
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index d95844cfed614..dd4bede775a40 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -29,6 +29,7 @@
#include "clang/Analysis/Analyses/CFGReachabilityAnalysis.h"
#include "clang/Analysis/Analyses/CalledOnceCheck.h"
#include "clang/Analysis/Analyses/Consumed.h"
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
#include "clang/Analysis/Analyses/ReachableCode.h"
#include "clang/Analysis/Analyses/ThreadSafety.h"
#include "clang/Analysis/Analyses/UninitializedValues.h"
@@ -49,6 +50,7 @@
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
#include <algorithm>
#include <deque>
#include <iterator>
@@ -2842,6 +2844,12 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
}
}
+ DEBUG_WITH_TYPE(
+ "ExperimentalLifetimeAnalysis", if (S.getLangOpts().CPlusPlus) {
+ if (CFG *cfg = AC.getCFG()) {
+ runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC);
+ }
+ });
// Check for violations of "called once" parameter properties.
if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus &&
shouldAnalyzeCalledOnceParameters(Diags, D->getBeginLoc())) {
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
new file mode 100644
index 0000000000000..466bb7d9c3cd9
--- /dev/null
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -0,0 +1,362 @@
+// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow -Wreturn-stack-address-cfg %s 2>&1 | FileCheck %s
+
+struct MyObj {
+ int id;
+ ~MyObj() {} // Non-trivial destructor
+};
+
+// Simple Local Variable Address and Return
+// CHECK-LABEL: Function: return_local_addr
+MyObj* return_local_addr() {
+ MyObj x {10};
+ MyObj* p = &x;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_X:[0-9]+]], OriginID: [[O_ADDR_X:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_X]])
+ return p;
+// CHECK: AssignOrigin (DestID: [[O_RET_VAL:[0-9]+]], SrcID: [[O_P]])
+// CHECK: ReturnOfOrigin (OriginID: [[O_RET_VAL]])
+// CHECK: Expire (LoanID: [[L_X]])
+}
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]]
+// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]]
+
+
+// Pointer Assignment and Return
+// CHECK-LABEL: Function: assign_and_return_local_addr
+// CHECK-NEXT: Block B{{[0-9]+}}:
+MyObj* assign_and_return_local_addr() {
+ MyObj y{20};
+ MyObj* ptr1 = &y;
+// CHECK: Issue (LoanID: [[L_Y:[0-9]+]], OriginID: [[O_ADDR_Y:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_PTR1:[0-9]+]], SrcID: [[O_ADDR_Y]])
+ MyObj* ptr2 = ptr1;
+// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL:[0-9]+]], SrcID: [[O_PTR1]])
+// CHECK: AssignOrigin (DestID: [[O_PTR2:[0-9]+]], SrcID: [[O_PTR1_RVAL]])
+ ptr2 = ptr1;
+// CHECK: AssignOrigin (DestID: [[O_PTR1_RVAL_2:[0-9]+]], SrcID: [[O_PTR1]])
+// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR1_RVAL_2]])
+ ptr2 = ptr2; // Self assignment.
+// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL:[0-9]+]], SrcID: [[O_PTR2]])
+// CHECK: AssignOrigin (DestID: [[O_PTR2]], SrcID: [[O_PTR2_RVAL]])
+ return ptr2;
+// CHECK: AssignOrigin (DestID: [[O_PTR2_RVAL_2:[0-9]+]], SrcID: [[O_PTR2]])
+// CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]])
+// CHECK: Expire (LoanID: [[L_Y]])
+}
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]]
+// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]]
+
+
+// Return of Non-Pointer Type
+// CHECK-LABEL: Function: return_int_val
+// CHECK-NEXT: Block B{{[0-9]+}}:
+int return_int_val() {
+ int x = 10;
+ return x;
+}
+// CHECK-NEXT: End of Block
+// CHECK: Dataflow results:
+
+// CHECK: <empty>
+
+
+// Loan Expiration (Automatic Variable, C++)
+// CHECK-LABEL: Function: loan_expires_cpp
+// CHECK-NEXT: Block B{{[0-9]+}}:
+void loan_expires_cpp() {
+ MyObj obj{1};
+ MyObj* pObj = &obj;
+// CHECK: Issue (LoanID: [[L_OBJ:[0-9]+]], OriginID: [[O_ADDR_OBJ:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]])
+// CHECK: Expire (LoanID: [[L_OBJ]])
+}
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]]
+// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]]
+
+
+// FIXME: No expire for Trivial Destructors
+// CHECK-LABEL: Function: loan_expires_trivial
+// CHECK-NEXT: Block B{{[0-9]+}}:
+void loan_expires_trivial() {
+ int trivial_obj = 1;
+ int* pTrivialObj = &trivial_obj;
+// CHECK: Issue (LoanID: [[L_TRIVIAL_OBJ:[0-9]+]], OriginID: [[O_ADDR_TRIVIAL_OBJ:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_PTOBJ:[0-9]+]], SrcID: [[O_ADDR_TRIVIAL_OBJ]])
+// CHECK-NOT: Expire (LoanID: [[L_TRIVIAL_OBJ]])
+// CHECK-NEXT: End of Block
+ // FIXME: Add check for Expire once trivial destructors are handled for expiration.
+}
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]]
+// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]]
+
+
+// CHECK-LABEL: Function: conditional
+void conditional(bool condition) {
+ int a = 5;
+ int b = 10;
+ int* p = nullptr;
+
+ if (condition)
+ p = &a;
+ // CHECK: Issue (LoanID: [[L_A:[0-9]+]], OriginID: [[O_ADDR_A:[0-9]+]])
+ // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_A]])
+ else
+ p = &b;
+ // CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]])
+ // CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]])
+}
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]]
+// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]]
+
+
+// CHECK-LABEL: Function: pointers_in_a_cycle
+void pointers_in_a_cycle(bool condition) {
+ MyObj v1{1};
+ MyObj v2{1};
+ MyObj v3{1};
+
+ MyObj* p1 = &v1;
+ MyObj* p2 = &v2;
+ MyObj* p3 = &v3;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_V1:[0-9]+]], OriginID: [[O_ADDR_V1:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P1:[0-9]+]], SrcID: [[O_ADDR_V1]])
+// CHECK: Issue (LoanID: [[L_V2:[0-9]+]], OriginID: [[O_ADDR_V2:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P2:[0-9]+]], SrcID: [[O_ADDR_V2]])
+// CHECK: Issue (LoanID: [[L_V3:[0-9]+]], OriginID: [[O_ADDR_V3:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P3:[0-9]+]], SrcID: [[O_ADDR_V3]])
+
+ while (condition) {
+ MyObj* temp = p1;
+ p1 = p2;
+ p2 = p3;
+ p3 = temp;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: AssignOrigin (DestID: [[O_P1_RVAL:[0-9]+]], SrcID: [[O_P1]])
+// CHECK: AssignOrigin (DestID: [[O_TEMP:[0-9]+]], SrcID: [[O_P1_RVAL]])
+// CHECK: AssignOrigin (DestID: [[O_P2_RVAL:[0-9]+]], SrcID: [[O_P2]])
+// CHECK: AssignOrigin (DestID: [[O_P1]], SrcID: [[O_P2_RVAL]])
+// CHECK: AssignOrigin (DestID: [[O_P3_RVAL:[0-9]+]], SrcID: [[O_P3]])
+// CHECK: AssignOrigin (DestID: [[O_P2]], SrcID: [[O_P3_RVAL]])
+// CHECK: AssignOrigin (DestID: [[O_TEMP_RVAL:[0-9]+]], SrcID: [[O_TEMP]])
+// CHECK: AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]])
+ }
+}
+// At the end of the analysis, the origins for the pointers involved in the cycle
+// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point.
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]]
+// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]]
+// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]]
+// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]]
+
+
+// CHECK-LABEL: Function: overwrite_origin
+void overwrite_origin() {
+ MyObj s1;
+ MyObj s2;
+ MyObj* p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]])
+ p = &s2;
+// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]])
+// CHECK: Expire (LoanID: [[L_S2]])
+// CHECK: Expire (LoanID: [[L_S1]])
+}
+// CHECK: Dataflow results:
+
+// CHECK: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]]
+
+
+// CHECK-LABEL: Function: reassign_to_null
+void reassign_to_null() {
+ MyObj s1;
+ MyObj* p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]])
+ p = nullptr;
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_NULLPTR:[0-9]+]])
+// CHECK: Expire (LoanID: [[L_S1]])
+}
+// FIXME: Have a better representation for nullptr than just an empty origin.
+// It should be a separate loan and origin kind.
+// CHECK: Dataflow results:
+
+// CHECK: Origin [[O_P]] contains no loans
+
+
+// CHECK-LABEL: Function: reassign_in_if
+void reassign_in_if(bool condition) {
+ MyObj s1;
+ MyObj s2;
+ MyObj* p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]])
+ if (condition) {
+ p = &s2;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]])
+ }
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Expire (LoanID: [[L_S2]])
+// CHECK: Expire (LoanID: [[L_S1]])
+}
+// CHECK: Dataflow results:
+
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
+
+
+// CHECK-LABEL: Function: assign_in_switch
+void assign_in_switch(int mode) {
+ MyObj s1;
+ MyObj s2;
+ MyObj s3;
+ MyObj* p = nullptr;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+ switch (mode) {
+ case 1:
+ p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S1]])
+ break;
+ case 2:
+ p = &s2;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]])
+ break;
+ default:
+ p = &s3;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S3:[0-9]+]], OriginID: [[O_ADDR_S3:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S3]])
+ break;
+ }
+// CHECK: Block B{{[0-9]+}}:
+// CHECK-DAG: Expire (LoanID: [[L_S3]])
+// CHECK-DAG: Expire (LoanID: [[L_S2]])
+// CHECK-DAG: Expire (LoanID: [[L_S1]])
+}
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]]
+// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]]
+
+
+// CHECK-LABEL: Function: loan_in_loop
+void loan_in_loop(bool condition) {
+ MyObj* p = nullptr;
+ // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+ while (condition) {
+ MyObj inner;
+ p = &inner;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]])
+// CHECK: Expire (LoanID: [[L_INNER]])
+ }
+}
+// CHECK: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]]
+// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]]
+
+
+// CHECK-LABEL: Function: loop_with_break
+void loop_with_break(int count) {
+ MyObj s1;
+ MyObj s2;
+ MyObj* p = &s1;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S1:[0-9]+]], OriginID: [[O_ADDR_S1:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_ADDR_S1]])
+ for (int i = 0; i < count; ++i) {
+ if (i == 5) {
+ p = &s2;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Issue (LoanID: [[L_S2:[0-9]+]], OriginID: [[O_ADDR_S2:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_S2]])
+ break;
+ }
+ }
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: Expire (LoanID: [[L_S2]])
+// CHECK: Expire (LoanID: [[L_S1]])
+}
+
+// CHECK-LABEL: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
+// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
+// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
+
+
+// CHECK-LABEL: Function: nested_scopes
+void nested_scopes() {
+ MyObj* p = nullptr;
+// CHECK: Block B{{[0-9]+}}:
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+ {
+ MyObj outer;
+ p = &outer;
+// CHECK: Issue (LoanID: [[L_OUTER:[0-9]+]], OriginID: [[O_ADDR_OUTER:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_OUTER]])
+ {
+ MyObj inner;
+ p = &inner;
+// CHECK: Issue (LoanID: [[L_INNER:[0-9]+]], OriginID: [[O_ADDR_INNER:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_INNER]])
+ }
+// CHECK: Expire (LoanID: [[L_INNER]])
+ }
+// CHECK: Expire (LoanID: [[L_OUTER]])
+}
+
+// CHECK-LABEL: Dataflow results:
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]]
+// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]]
+// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]]
>From 44fcea1b6c0c84205ca9eec231978e33099c95f5 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena <usx at google.com>
Date: Thu, 12 Jun 2025 12:54:41 +0000
Subject: [PATCH 2/2] address comments
---
clang/lib/Analysis/LifetimeSafety.cpp | 381 ++++++++++--------
clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 +-
.../Sema/warn-lifetime-safety-dataflow.cpp | 28 +-
3 files changed, 219 insertions(+), 194 deletions(-)
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index 8dbb4dc37026c..1602b52c80ccd 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -6,159 +6,187 @@
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
+#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableMap.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TimeProfiler.h"
-#include <vector>
+#include <cstdint>
namespace clang {
namespace {
-struct Point {
- const clang::CFGBlock *Block;
- /// Index into Block->Elements().
- unsigned ElementIndex;
-
- Point(const clang::CFGBlock *B = nullptr, unsigned Idx = 0)
- : Block(B), ElementIndex(Idx) {}
-
- bool operator==(const Point &Other) const {
- return Block == Other.Block && ElementIndex == Other.ElementIndex;
- }
-};
-
/// Represents the storage location being borrowed, e.g., a specific stack
/// variable.
-/// TODO: Handle member accesseslike `s.y`.
-struct Path {
+struct AccessPath {
const clang::ValueDecl *D;
enum class Kind : uint8_t {
StackVariable,
- Heap, // TODO: Handle.
- Field, // TODO: Handle.
- ArrayElement, // TODO: Handle.
- TemporaryObject, // TODO: Handle.
- StaticOrGlobal, // TODO: Handle.
+ Temporary, // TODO: Handle.
+ Field, // TODO: Handle like `s.y`.
+ Heap, // TODO: Handle.
+ ArrayElement, // TODO: Handle.
+ Static, // TODO: Handle.
};
Kind PathKind;
- Path(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {}
+ AccessPath(const clang::ValueDecl *D, Kind K) : D(D), PathKind(K) {}
+};
+
+/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type.
+/// Used for giving ID to loans and origins.
+template <typename Tag> struct ID {
+ uint32_t Value = 0;
+
+ bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; }
+ bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); }
+ bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; }
+ ID<Tag> &operator++() {
+ ++Value;
+ return *this;
+ }
+ void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
+ IDBuilder.AddInteger(Value);
+ }
};
-using LoanID = uint32_t;
-using OriginID = uint32_t;
+template <typename Tag>
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) {
+ return OS << ID.Value;
+}
+
+struct LoanTag {};
+struct OriginTag {};
+
+using LoanID = ID<LoanTag>;
+using OriginID = ID<OriginTag>;
/// Information about a single borrow, or "Loan". A loan is created when a
/// reference or pointer is taken.
-struct LoanInfo {
+struct Loan {
/// TODO: Represent opaque loans.
/// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
/// is represented as empty LoanSet
LoanID ID;
- Path SourcePath;
+ AccessPath Path;
SourceLocation IssueLoc;
- LoanInfo(LoanID id, Path path, SourceLocation loc)
- : ID(id), SourcePath(path), IssueLoc(loc) {}
+ Loan(LoanID id, AccessPath path, SourceLocation loc)
+ : ID(id), Path(path), IssueLoc(loc) {}
};
-enum class OriginKind : uint8_t { Variable, ExpressionResult };
-
/// An Origin is a symbolic identifier that represents the set of possible
/// loans a pointer-like object could hold at any given time.
/// TODO: Also represent Origins of complex types (fields, inner types).
-struct OriginInfo {
+struct Origin {
OriginID ID;
- OriginKind Kind;
- union {
- const clang::ValueDecl *Decl;
- const clang::Expr *Expression;
- };
- OriginInfo(OriginID id, OriginKind kind, const clang::ValueDecl *D)
- : ID(id), Kind(kind), Decl(D) {}
- OriginInfo(OriginID id, OriginKind kind, const clang::Expr *E)
- : ID(id), Kind(kind), Expression(E) {}
+ llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr;
+
+ Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {}
+ Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {}
+
+ const clang::ValueDecl *getDecl() const {
+ return Ptr.dyn_cast<const clang::ValueDecl *>();
+ }
+ const clang::Expr *getExpr() const {
+ return Ptr.dyn_cast<const clang::Expr *>();
+ }
};
class LoanManager {
public:
LoanManager() = default;
- LoanInfo &addLoanInfo(Path path, SourceLocation loc) {
- NextLoanIDVal++;
- AllLoans.emplace_back(NextLoanIDVal, path, loc);
+ Loan &addLoan(AccessPath path, SourceLocation loc) {
+ ++NextLoanID;
+ AllLoans.emplace_back(NextLoanID, path, loc);
return AllLoans.back();
}
- const LoanInfo *getLoanInfo(LoanID id) const {
- if (id < AllLoans.size())
- return &AllLoans[id];
- return nullptr;
+ const Loan &getLoan(LoanID id) const {
+ assert(id.Value < AllLoans.size());
+ return AllLoans[id.Value];
}
- llvm::ArrayRef<LoanInfo> getLoanInfos() const { return AllLoans; }
+ llvm::ArrayRef<Loan> getLoans() const { return AllLoans; }
private:
- LoanID NextLoanIDVal = 0;
- llvm::SmallVector<LoanInfo> AllLoans;
+ LoanID NextLoanID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Loan> AllLoans;
};
class OriginManager {
public:
OriginManager() = default;
- OriginID getNextOriginID() { return NextOriginIDVal++; }
- OriginInfo &addOriginInfo(OriginID id, const clang::ValueDecl *D) {
- assert(D != nullptr);
- AllOrigins.emplace_back(id, OriginKind::Variable, D);
+ OriginID getNextOriginID() { return ++NextOriginID; }
+ Origin &addOrigin(OriginID id, const clang::ValueDecl &D) {
+ AllOrigins.emplace_back(id, &D);
return AllOrigins.back();
}
- OriginInfo &addOriginInfo(OriginID id, const clang::Expr *E) {
- assert(E != nullptr);
- AllOrigins.emplace_back(id, OriginKind::ExpressionResult, E);
+ Origin &addOrigin(OriginID id, const clang::Expr &E) {
+ AllOrigins.emplace_back(id, &E);
return AllOrigins.back();
}
- OriginID getOrCreate(const Expr *E) {
- auto It = ExprToOriginID.find(E);
+ OriginID get(const Expr &E) {
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ return get(*DRE->getDecl());
+ }
+ auto It = ExprToOriginID.find(&E);
+ assert(It != ExprToOriginID.end());
+ return It->second;
+ }
+
+ OriginID get(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ assert(It != DeclToOriginID.end());
+ return It->second;
+ }
+
+ OriginID getOrCreate(const Expr &E) {
+ auto It = ExprToOriginID.find(&E);
if (It != ExprToOriginID.end())
return It->second;
- if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
// Origin of DeclRefExpr is that of the declaration it refers to.
- return getOrCreate(DRE->getDecl());
+ return getOrCreate(*DRE->getDecl());
}
OriginID NewID = getNextOriginID();
- addOriginInfo(NewID, E);
- ExprToOriginID[E] = NewID;
+ addOrigin(NewID, E);
+ ExprToOriginID[&E] = NewID;
return NewID;
}
- const OriginInfo *getOriginInfo(OriginID id) const {
- if (id < AllOrigins.size())
- return &AllOrigins[id];
- return nullptr;
+ const Origin &getOrigin(OriginID ID) const {
+ assert(ID.Value < AllOrigins.size());
+ return AllOrigins[ID.Value];
}
- llvm::ArrayRef<OriginInfo> getOriginInfos() const { return AllOrigins; }
+ llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; }
- OriginID getOrCreate(const ValueDecl *D) {
- auto It = DeclToOriginID.find(D);
+ OriginID getOrCreate(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
if (It != DeclToOriginID.end())
return It->second;
OriginID NewID = getNextOriginID();
- addOriginInfo(NewID, D);
- DeclToOriginID[D] = NewID;
+ addOrigin(NewID, D);
+ DeclToOriginID[&D] = NewID;
return NewID;
}
private:
- OriginID NextOriginIDVal = 0;
- llvm::SmallVector<OriginInfo> AllOrigins;
+ OriginID NextOriginID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Origin> AllOrigins;
llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
};
@@ -183,13 +211,11 @@ class Fact {
Kind K;
protected:
- Point P;
- Fact(Kind K, Point Pt) : K(K), P(Pt) {}
+ Fact(Kind K) : K(K) {}
public:
virtual ~Fact() = default;
Kind getKind() const { return K; }
- Point getPoint() const { return P; }
template <typename T> const T *getAs() const {
if (T::classof(this))
@@ -198,8 +224,7 @@ class Fact {
}
virtual void dump(llvm::raw_ostream &OS) const {
- OS << "Fact (Kind: " << static_cast<int>(K) << ", Point: B"
- << P.Block->getBlockID() << ":" << P.ElementIndex << ")\n";
+ OS << "Fact (Kind: " << static_cast<int>(K) << ")\n";
}
};
@@ -208,11 +233,11 @@ class IssueFact : public Fact {
OriginID OID;
public:
- IssueFact(LoanID LID, OriginID OID, Point Pt)
- : Fact(Kind::Issue, Pt), LID(LID), OID(OID) {}
+ static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
+
+ IssueFact(LoanID LID, OriginID OID) : Fact(Kind::Issue), LID(LID), OID(OID) {}
LoanID getLoanID() const { return LID; }
OriginID getOriginID() const { return OID; }
- static bool classof(const Fact *F) { return F->getKind() == Kind::Issue; }
void dump(llvm::raw_ostream &OS) const override {
OS << "Issue (LoanID: " << getLoanID() << ", OriginID: " << getOriginID()
<< ")\n";
@@ -223,9 +248,10 @@ class ExpireFact : public Fact {
LoanID LID;
public:
- ExpireFact(LoanID LID, Point Pt) : Fact(Kind::Expire, Pt), LID(LID) {}
- LoanID getLoanID() const { return LID; }
static bool classof(const Fact *F) { return F->getKind() == Kind::Expire; }
+
+ ExpireFact(LoanID LID) : Fact(Kind::Expire), LID(LID) {}
+ LoanID getLoanID() const { return LID; }
void dump(llvm::raw_ostream &OS) const override {
OS << "Expire (LoanID: " << getLoanID() << ")\n";
}
@@ -236,13 +262,14 @@ class AssignOriginFact : public Fact {
OriginID OIDSrc;
public:
- AssignOriginFact(OriginID OIDDest, OriginID OIDSrc, Point Pt)
- : Fact(Kind::AssignOrigin, Pt), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
- OriginID getDestOriginID() const { return OIDDest; }
- OriginID getSrcOriginID() const { return OIDSrc; }
static bool classof(const Fact *F) {
return F->getKind() == Kind::AssignOrigin;
}
+
+ AssignOriginFact(OriginID OIDDest, OriginID OIDSrc)
+ : Fact(Kind::AssignOrigin), OIDDest(OIDDest), OIDSrc(OIDSrc) {}
+ OriginID getDestOriginID() const { return OIDDest; }
+ OriginID getSrcOriginID() const { return OIDSrc; }
void dump(llvm::raw_ostream &OS) const override {
OS << "AssignOrigin (DestID: " << getDestOriginID()
<< ", SrcID: " << getSrcOriginID() << ")\n";
@@ -253,12 +280,12 @@ class ReturnOfOriginFact : public Fact {
OriginID OID;
public:
- ReturnOfOriginFact(OriginID OID, Point Pt)
- : Fact(Kind::ReturnOfOrigin, Pt), OID(OID) {}
- OriginID getReturnedOriginID() const { return OID; }
static bool classof(const Fact *F) {
return F->getKind() == Kind::ReturnOfOrigin;
}
+
+ ReturnOfOriginFact(OriginID OID) : Fact(Kind::ReturnOfOrigin), OID(OID) {}
+ OriginID getReturnedOriginID() const { return OID; }
void dump(llvm::raw_ostream &OS) const override {
OS << "ReturnOfOrigin (OriginID: " << getReturnedOriginID() << ")\n";
}
@@ -266,15 +293,17 @@ class ReturnOfOriginFact : public Fact {
class FactManager {
public:
- llvm::ArrayRef<Fact *> getFacts(const CFGBlock *B) const {
+ llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
auto It = BlockToFactsMap.find(B);
if (It != BlockToFactsMap.end())
return It->second;
return {};
}
- void addFact(const CFGBlock *B, Fact *NewFact) {
- BlockToFactsMap[B].push_back(NewFact);
+ void addBlockFacts(const CFGBlock *B, llvm::ArrayRef<Fact *> NewFacts) {
+ if (!NewFacts.empty()) {
+ BlockToFactsMap[B].assign(NewFacts.begin(), NewFacts.end());
+ }
}
template <typename FactType, typename... Args>
@@ -308,75 +337,70 @@ class FactManager {
}
}
+ LoanManager &getLoanMgr() { return LoanMgr; }
+ OriginManager &getOriginMgr() { return OriginMgr; }
+
private:
- llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<Fact *>>
+ LoanManager LoanMgr;
+ OriginManager OriginMgr;
+ llvm::DenseMap<const clang::CFGBlock *, llvm::SmallVector<const Fact *>>
BlockToFactsMap;
llvm::BumpPtrAllocator FactAllocator;
};
-struct FactsContext {
- FactManager Facts;
- LoanManager Loans;
- OriginManager Origins;
-};
-
class FactGenerator : public ConstStmtVisitor<FactGenerator> {
public:
- FactGenerator(const CFG &Cfg, FactsContext &FactsCtx, AnalysisDeclContext &AC)
- : FactsCtx(FactsCtx), Cfg(Cfg), AC(AC), CurrentBlock(nullptr) {}
+ FactGenerator(const CFG &Cfg, FactManager &FactMgr, AnalysisDeclContext &AC)
+ : FactMgr(FactMgr), Cfg(Cfg), AC(AC) {}
void run() {
- llvm::TimeTraceScope TimeProfile("Fact Generation");
- // Iterate through the CFG blocks in pre-order to ensure that
+ llvm::TimeTraceScope TimeProfile("FactGenerator");
+ // Iterate through the CFG blocks in reverse post-order to ensure that
// initializations and destructions are processed in the correct sequence.
- // TODO: A pre-order traversal utility should be provided by Dataflow
- // framework.
+ // TODO: A reverse post-order traversal utility should be provided by
+ // Dataflow framework.
ForwardDataflowWorklist Worklist(Cfg, AC);
for (const CFGBlock *B : Cfg.const_nodes())
Worklist.enqueueBlock(B);
while (const CFGBlock *Block = Worklist.dequeue()) {
- CurrentBlock = Block;
+ CurrentBlockFacts.clear();
for (unsigned I = 0; I < Block->size(); ++I) {
const CFGElement &Element = Block->Elements[I];
- CurrentPoint = Point(Block, I);
if (std::optional<CFGStmt> CS = Element.getAs<CFGStmt>())
Visit(CS->getStmt());
else if (std::optional<CFGAutomaticObjDtor> DtorOpt =
Element.getAs<CFGAutomaticObjDtor>())
handleDestructor(*DtorOpt);
}
+ FactMgr.addBlockFacts(Block, CurrentBlockFacts);
}
}
void VisitDeclStmt(const DeclStmt *DS) {
- for (const Decl *D : DS->decls()) {
- if (const auto *VD = dyn_cast<VarDecl>(D)) {
- if (hasOrigin(VD->getType())) {
- if (const Expr *InitExpr = VD->getInit()) {
- OriginID DestOID = FactsCtx.Origins.getOrCreate(VD);
- OriginID SrcOID = FactsCtx.Origins.getOrCreate(InitExpr);
- FactsCtx.Facts.addFact(CurrentBlock,
- FactsCtx.Facts.createFact<AssignOriginFact>(
- DestOID, SrcOID, CurrentPoint));
- }
- }
- }
- }
+ for (const Decl *D : DS->decls())
+ if (const auto *VD = dyn_cast<VarDecl>(D))
+ if (hasOrigin(VD->getType()))
+ if (const Expr *InitExpr = VD->getInit())
+ addAssignOriginFact(*VD, *InitExpr);
+ }
+
+ void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) {
+ /// TODO: Handle nullptr expr as a special 'null' loan. Uninintialed
+ /// pointers can use the same type of loan.
+ FactMgr.getOriginMgr().getOrCreate(*N);
}
void VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
if (!hasOrigin(ICE->getType()))
return;
+ Visit(ICE->getSubExpr());
+ /// TODO: Consider if this is actually useful in practice. Alternatively, we
+ /// could directly use the sub-expression's OriginID instead of creating a
+ /// new one.
// An ImplicitCastExpr node itself gets an origin, which flows from the
// origin of its sub-expression (after stripping its own parens/casts).
- if (ICE->getCastKind() == CK_LValueToRValue) {
- OriginID IceOID = FactsCtx.Origins.getOrCreate(ICE);
- OriginID SubExprOID = FactsCtx.Origins.getOrCreate(ICE->getSubExpr());
- FactsCtx.Facts.addFact(CurrentBlock,
- FactsCtx.Facts.createFact<AssignOriginFact>(
- IceOID, SubExprOID, CurrentPoint));
- }
+ addAssignOriginFact(*ICE, *ICE->getSubExpr());
}
void VisitUnaryOperator(const UnaryOperator *UO) {
@@ -386,13 +410,12 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
// Check if it's a local variable.
if (VD->hasLocalStorage()) {
- OriginID OID = FactsCtx.Origins.getOrCreate(UO);
- Path AddrOfLocalVarPath(VD, Path::Kind::StackVariable);
- LoanInfo &Loan = FactsCtx.Loans.addLoanInfo(AddrOfLocalVarPath,
- UO->getOperatorLoc());
- FactsCtx.Facts.addFact(CurrentBlock,
- FactsCtx.Facts.createFact<IssueFact>(
- Loan.ID, OID, CurrentPoint));
+ OriginID OID = FactMgr.getOriginMgr().getOrCreate(*UO);
+ AccessPath AddrOfLocalVarPath(VD, AccessPath::Kind::StackVariable);
+ Loan &L = FactMgr.getLoanMgr().addLoan(AddrOfLocalVarPath,
+ UO->getOperatorLoc());
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<IssueFact>(L.ID, OID));
}
}
}
@@ -402,10 +425,9 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
void VisitReturnStmt(const ReturnStmt *RS) {
if (const Expr *RetExpr = RS->getRetValue()) {
if (hasOrigin(RetExpr->getType())) {
- OriginID OID = FactsCtx.Origins.getOrCreate(RetExpr);
- FactsCtx.Facts.addFact(
- CurrentBlock,
- FactsCtx.Facts.createFact<ReturnOfOriginFact>(OID, CurrentPoint));
+ OriginID OID = FactMgr.getOriginMgr().getOrCreate(*RetExpr);
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<ReturnOfOriginFact>(OID));
}
}
}
@@ -419,17 +441,11 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
// LHS must be a pointer/reference type that can be an origin.
// RHS must also represent an origin (either another pointer/ref or an
// address-of).
- if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) {
+ if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr))
if (const auto *VD_LHS =
dyn_cast<ValueDecl>(DRE_LHS->getDecl()->getCanonicalDecl());
- VD_LHS && hasOrigin(VD_LHS->getType())) {
- OriginID DestOID = FactsCtx.Origins.getOrCreate(VD_LHS);
- OriginID SrcOID = FactsCtx.Origins.getOrCreate(RHSExpr);
- FactsCtx.Facts.addFact(CurrentBlock,
- FactsCtx.Facts.createFact<AssignOriginFact>(
- DestOID, SrcOID, CurrentPoint));
- }
- }
+ VD_LHS && hasOrigin(VD_LHS->getType()))
+ addAssignOriginFact(*VD_LHS, *RHSExpr);
}
}
@@ -437,33 +453,40 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
// Check if a type have an origin.
bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); }
+ template <typename Destination, typename Source>
+ void addAssignOriginFact(const Destination &D, const Source &S) {
+ OriginID DestOID = FactMgr.getOriginMgr().getOrCreate(D);
+ OriginID SrcOID = FactMgr.getOriginMgr().get(S);
+ CurrentBlockFacts.push_back(
+ FactMgr.createFact<AssignOriginFact>(DestOID, SrcOID));
+ }
+
void handleDestructor(const CFGAutomaticObjDtor &DtorOpt) {
/// TODO: Also handle trivial destructors (e.g., for `int`
- // variables) which will never have a CFGAutomaticObjDtor node.
+ /// variables) which will never have a CFGAutomaticObjDtor node.
/// TODO: Handle loans to temporaries.
const VarDecl *DestructedVD = DtorOpt.getVarDecl();
if (!DestructedVD)
return;
// Iterate through all loans to see if any expire.
- for (const LoanInfo &Loan : FactsCtx.Loans.getLoanInfos()) {
- const Path &LoanPath = Loan.SourcePath;
+ /// TODO(opt): Do better than a linear search to find loans associated with
+ /// 'DestructedVD'.
+ for (const Loan &L : FactMgr.getLoanMgr().getLoans()) {
+ const AccessPath &LoanPath = L.Path;
// Check if the loan is for a stack variable and if that variable
// is the one being destructed.
- if (LoanPath.PathKind == Path::Kind::StackVariable) {
+ if (LoanPath.PathKind == AccessPath::Kind::StackVariable) {
if (LoanPath.D == DestructedVD) {
- FactsCtx.Facts.addFact(
- CurrentBlock,
- FactsCtx.Facts.createFact<ExpireFact>(Loan.ID, CurrentPoint));
+ CurrentBlockFacts.push_back(FactMgr.createFact<ExpireFact>(L.ID));
}
}
}
}
- FactsContext &FactsCtx;
+ FactManager &FactMgr;
const CFG &Cfg;
AnalysisDeclContext &AC;
- const CFGBlock *CurrentBlock;
- Point CurrentPoint;
+ llvm::SmallVector<Fact *> CurrentBlockFacts;
};
// ========================================================================= //
@@ -476,7 +499,7 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
using LoanSet = llvm::ImmutableSet<LoanID>;
using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
-/// A context object to hold the factories for immutable collections, ensuring
+/// An object to hold the factories for immutable collections, ensuring
/// that all created states share the same underlying memory management.
struct LifetimeFactory {
OriginLoanMap::Factory OriginMapFact;
@@ -512,28 +535,28 @@ struct LifetimeLattice {
return Factory.LoanSetFact.getEmptySet();
}
- /// Computes the union of two lattices by performing a key-wise merge of
+ /// Computes the union of two lattices by performing a key-wise join of
/// their OriginLoanMaps.
- LifetimeLattice merge(const LifetimeLattice &Other,
- LifetimeFactory &Factory) const {
+ LifetimeLattice join(const LifetimeLattice &Other,
+ LifetimeFactory &Factory) const {
/// Merge the smaller map into the larger one ensuring we iterate over the
/// smaller map.
if (Origins.getHeight() < Other.Origins.getHeight())
- return Other.merge(*this, Factory);
+ return Other.join(*this, Factory);
- OriginLoanMap MergedState = Origins;
+ OriginLoanMap JoinedState = Origins;
// For each origin in the other map, union its loan set with ours.
for (const auto &Entry : Other.Origins) {
OriginID OID = Entry.first;
LoanSet OtherLoanSet = Entry.second;
- MergedState = Factory.OriginMapFact.add(
- MergedState, OID,
- merge(getLoans(OID, Factory), OtherLoanSet, Factory));
+ JoinedState = Factory.OriginMapFact.add(
+ JoinedState, OID,
+ join(getLoans(OID, Factory), OtherLoanSet, Factory));
}
- return LifetimeLattice(MergedState);
+ return LifetimeLattice(JoinedState);
}
- LoanSet merge(LoanSet a, LoanSet b, LifetimeFactory &Factory) const {
+ LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const {
/// Merge the smaller set into the larger one ensuring we iterate over the
/// smaller set.
if (a.getHeight() < b.getHeight())
@@ -542,7 +565,7 @@ struct LifetimeLattice {
for (LoanID LID : b) {
/// TODO(opt): Profiling shows that this loop is a major performance
/// bottleneck. Investigate using a BitVector to represent the set of
- /// loans for improved merge performance.
+ /// loans for improved join performance.
Result = Factory.LoanSetFact.add(Result, LID);
}
return Result;
@@ -579,7 +602,7 @@ class Transferer {
LifetimeLattice transferBlock(const CFGBlock *Block,
LifetimeLattice EntryState) {
LifetimeLattice BlockState = EntryState;
- llvm::ArrayRef<Fact *> Facts = AllFacts.getFacts(Block);
+ llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(Block);
for (const Fact *F : Facts) {
BlockState = transferFact(BlockState, F);
@@ -664,7 +687,7 @@ class LifetimeDataflow {
? SuccIt->second
: LifetimeLattice{};
LifetimeLattice NewSuccEntryState =
- OldSuccEntryState.merge(ExitState, LifetimeFact);
+ OldSuccEntryState.join(ExitState, LifetimeFact);
if (SuccIt == BlockEntryStates.end() ||
NewSuccEntryState != OldSuccEntryState) {
BlockEntryStates[Successor] = NewSuccEntryState;
@@ -707,10 +730,12 @@ class LifetimeDataflow {
void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
AnalysisDeclContext &AC) {
llvm::TimeTraceScope TimeProfile("Lifetime Analysis");
- FactsContext FactsCtx;
- FactGenerator FactGen(Cfg, FactsCtx, AC);
+ DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(),
+ /*ShowColors=*/true));
+ FactManager FactMgr;
+ FactGenerator FactGen(Cfg, FactMgr, AC);
FactGen.run();
- DEBUG_WITH_TYPE("LifetimeFacts", FactsCtx.Facts.dump(Cfg, AC));
+ DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC));
/// TODO(opt): Consider optimizing individual blocks before running the
/// dataflow analysis.
@@ -721,8 +746,8 @@ void runLifetimeAnalysis(const DeclContext &DC, const CFG &Cfg,
/// blocks; only Decls are visible. Therefore, loans in a block that
/// never reach an Origin associated with a Decl can be safely dropped by
/// the analysis.
- LifetimeDataflow Dataflow(Cfg, FactsCtx.Facts, AC);
+ LifetimeDataflow Dataflow(Cfg, FactMgr, AC);
Dataflow.run();
DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump());
}
-} // namespace clang
\ No newline at end of file
+} // namespace clang
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index dd4bede775a40..a6e82e9572df4 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2845,7 +2845,9 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
}
DEBUG_WITH_TYPE(
- "ExperimentalLifetimeAnalysis", if (S.getLangOpts().CPlusPlus) {
+ "ExperimentalLifetimeAnalysis",
+ // TODO: Enable for other languages once the analysis is stable.
+ if (S.getLangOpts().CPlusPlus) {
if (CFG *cfg = AC.getCFG()) {
runLifetimeAnalysis(*cast<DeclContext>(D), *cfg, AC);
}
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 466bb7d9c3cd9..2d732e56c0872 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow -Wreturn-stack-address-cfg %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -mllvm -debug-only=ExperimentalLifetimeAnalysis,LifetimeFacts,LifetimeDataflow %s 2>&1 | FileCheck %s
struct MyObj {
int id;
@@ -19,7 +19,6 @@ MyObj* return_local_addr() {
// CHECK: Expire (LoanID: [[L_X]])
}
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]]
// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]]
// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]]
@@ -48,7 +47,6 @@ MyObj* assign_and_return_local_addr() {
// CHECK: Expire (LoanID: [[L_Y]])
}
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]]
// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]]
// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]]
@@ -67,7 +65,6 @@ int return_int_val() {
}
// CHECK-NEXT: End of Block
// CHECK: Dataflow results:
-
// CHECK: <empty>
@@ -82,7 +79,6 @@ void loan_expires_cpp() {
// CHECK: Expire (LoanID: [[L_OBJ]])
}
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]]
// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]]
@@ -100,7 +96,6 @@ void loan_expires_trivial() {
// FIXME: Add check for Expire once trivial destructors are handled for expiration.
}
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]]
// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]]
@@ -119,13 +114,17 @@ void conditional(bool condition) {
p = &b;
// CHECK: Issue (LoanID: [[L_B:[0-9]+]], OriginID: [[O_ADDR_B:[0-9]+]])
// CHECK: AssignOrigin (DestID: [[O_P]], SrcID: [[O_ADDR_B]])
+ int *q = p;
+ // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]])
+ // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]])
}
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]]
// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]]
+// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]]
// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]]
+// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]]
+// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]]
// CHECK-LABEL: Function: pointers_in_a_cycle
@@ -164,7 +163,6 @@ void pointers_in_a_cycle(bool condition) {
// At the end of the analysis, the origins for the pointers involved in the cycle
// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point.
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]]
// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]]
// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]]
@@ -197,7 +195,6 @@ void overwrite_origin() {
// CHECK: Expire (LoanID: [[L_S1]])
}
// CHECK: Dataflow results:
-
// CHECK: Origin [[O_P]] contains Loan [[L_S2]]
// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]]
@@ -216,7 +213,6 @@ void reassign_to_null() {
// FIXME: Have a better representation for nullptr than just an empty origin.
// It should be a separate loan and origin kind.
// CHECK: Dataflow results:
-
// CHECK: Origin [[O_P]] contains no loans
@@ -239,7 +235,6 @@ void reassign_in_if(bool condition) {
// CHECK: Expire (LoanID: [[L_S1]])
}
// CHECK: Dataflow results:
-
// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
@@ -253,7 +248,8 @@ void assign_in_switch(int mode) {
MyObj s3;
MyObj* p = nullptr;
// CHECK: Block B{{[0-9]+}}:
-// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]])
switch (mode) {
case 1:
p = &s1;
@@ -291,7 +287,8 @@ void assign_in_switch(int mode) {
// CHECK-LABEL: Function: loan_in_loop
void loan_in_loop(bool condition) {
MyObj* p = nullptr;
- // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+ // CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+ // CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]])
while (condition) {
MyObj inner;
p = &inner;
@@ -339,7 +336,8 @@ void loop_with_break(int count) {
void nested_scopes() {
MyObj* p = nullptr;
// CHECK: Block B{{[0-9]+}}:
-// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_NULLPTR_CAST:[0-9]+]], SrcID: [[O_NULLPTR:[0-9]+]])
+// CHECK: AssignOrigin (DestID: [[O_P:[0-9]+]], SrcID: [[O_NULLPTR_CAST]])
{
MyObj outer;
p = &outer;
More information about the cfe-commits
mailing list