[clang] [LifetimeSafety] Introduce intra-procedural analysis in Clang (PR #142313)
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 10 08:16:44 PDT 2025
================
@@ -0,0 +1,504 @@
+//===- LifetimeSafety.cpp - C++ Lifetime Safety Analysis -*--------- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
+#include "clang/Analysis/Analyses/PostOrderCFGView.h"
+#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Analysis/CFG.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include <cstdint>
+
+namespace clang {
+namespace {
+
+/// Represents the storage location being borrowed, e.g., a specific stack
+/// variable.
+/// TODO: Model access paths of other types, e.g., s.field, heap and globals.
+struct AccessPath {
+ const clang::ValueDecl *D;
+
+ AccessPath(const clang::ValueDecl *D) : D(D) {}
+};
+
+/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type.
+/// Used for giving ID to loans and origins.
+template <typename Tag> struct ID {
+ uint32_t Value = 0;
+
+ bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; }
+ bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); }
+ bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; }
+ ID<Tag> operator++(int) {
+ ID<Tag> Tmp = *this;
+ ++Value;
+ return Tmp;
+ }
+ void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
+ IDBuilder.AddInteger(Value);
+ }
+};
+
+template <typename Tag>
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) {
+ return OS << ID.Value;
+}
+
+using LoanID = ID<struct LoanTag>;
+using OriginID = ID<struct OriginTag>;
+
+/// Information about a single borrow, or "Loan". A loan is created when a
+/// reference or pointer is taken.
+struct Loan {
+ /// TODO: Represent opaque loans.
+ /// TODO: Represent nullptr: loans to no path. Accessing it UB! Currently it
+ /// is represented as empty LoanSet
+ LoanID ID;
+ AccessPath Path;
+ SourceLocation IssueLoc;
+
+ Loan(LoanID id, AccessPath path, SourceLocation loc)
+ : ID(id), Path(path), IssueLoc(loc) {}
+};
+
+/// An Origin is a symbolic identifier that represents the set of possible
+/// loans a pointer-like object could hold at any given time.
+/// TODO: Enhance the origin model to handle complex types, pointer
+/// indirection and reborrowing. The plan is to move from a single origin per
+/// variable/expression to a "list of origins" governed by the Type.
+/// For example, the type 'int**' would have two origins.
+/// See discussion:
+/// https://github.com/llvm/llvm-project/pull/142313/commits/0cd187b01e61b200d92ca0b640789c1586075142#r2137644238
+struct Origin {
+ OriginID ID;
+ llvm::PointerUnion<const clang::ValueDecl *, const clang::Expr *> Ptr;
+
+ Origin(OriginID ID, const clang::ValueDecl *D) : ID(ID), Ptr(D) {}
+ Origin(OriginID ID, const clang::Expr *E) : ID(ID), Ptr(E) {}
+
+ const clang::ValueDecl *getDecl() const {
+ return Ptr.dyn_cast<const clang::ValueDecl *>();
+ }
+ const clang::Expr *getExpr() const {
+ return Ptr.dyn_cast<const clang::Expr *>();
+ }
+};
+
+class LoanManager {
+public:
+ LoanManager() = default;
+
+ Loan &addLoan(AccessPath Path, SourceLocation Loc) {
+ AllLoans.emplace_back(getNextLoanID(), Path, Loc);
+ return AllLoans.back();
+ }
+
+ const Loan &getLoan(LoanID ID) const {
+ assert(ID.Value < AllLoans.size());
+ return AllLoans[ID.Value];
+ }
+ llvm::ArrayRef<Loan> getLoans() const { return AllLoans; }
+
+private:
+ LoanID getNextLoanID() { return NextLoanID++; }
+
+ LoanID NextLoanID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Loan> AllLoans;
+};
+
+class OriginManager {
+public:
+ OriginManager() = default;
+
+ Origin &addOrigin(OriginID ID, const clang::ValueDecl &D) {
+ AllOrigins.emplace_back(ID, &D);
+ return AllOrigins.back();
+ }
+ Origin &addOrigin(OriginID ID, const clang::Expr &E) {
+ AllOrigins.emplace_back(ID, &E);
+ return AllOrigins.back();
+ }
+
+ OriginID get(const Expr &E) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E))
+ return get(*DRE->getDecl());
+ auto It = ExprToOriginID.find(&E);
+ // TODO: This should be an assert(It != ExprToOriginID.end()). The current
+ // implementation falls back to getOrCreate to avoid crashing on
+ // yet-unhandled pointer expressions, creating an empty origin for them.
+ if (It == ExprToOriginID.end())
+ return getOrCreate(E);
+
+ return It->second;
+ }
+
+ OriginID get(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ // TODO: This should be an assert(It != DeclToOriginID.end()). The current
+ // implementation falls back to getOrCreate to avoid crashing on
+ // yet-unhandled pointer expressions, creating an empty origin for them.
+ if (It == DeclToOriginID.end())
+ return getOrCreate(D);
+
+ return It->second;
+ }
+
+ OriginID getOrCreate(const Expr &E) {
+ auto It = ExprToOriginID.find(&E);
+ if (It != ExprToOriginID.end())
+ return It->second;
+
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(&E)) {
+ // Origin of DeclRefExpr is that of the declaration it refers to.
+ return getOrCreate(*DRE->getDecl());
+ }
+ OriginID NewID = getNextOriginID();
+ addOrigin(NewID, E);
+ ExprToOriginID[&E] = NewID;
+ return NewID;
+ }
+
+ const Origin &getOrigin(OriginID ID) const {
+ assert(ID.Value < AllOrigins.size());
+ return AllOrigins[ID.Value];
+ }
+
+ llvm::ArrayRef<Origin> getOrigins() const { return AllOrigins; }
+
+ OriginID getOrCreate(const ValueDecl &D) {
+ auto It = DeclToOriginID.find(&D);
+ if (It != DeclToOriginID.end())
+ return It->second;
+ OriginID NewID = getNextOriginID();
+ addOrigin(NewID, D);
+ DeclToOriginID[&D] = NewID;
+ return NewID;
+ }
+
+private:
+ OriginID getNextOriginID() { return NextOriginID++; }
+
+ OriginID NextOriginID{0};
+ /// TODO(opt): Profile and evaluate the usefullness of small buffer
+ /// optimisation.
+ llvm::SmallVector<Origin> AllOrigins;
+ llvm::DenseMap<const clang::ValueDecl *, OriginID> DeclToOriginID;
+ llvm::DenseMap<const clang::Expr *, OriginID> ExprToOriginID;
+};
+
+/// An abstract base class for a single, atomic lifetime-relevant event.
+class Fact {
+
+public:
+ enum class Kind : uint8_t {
+ /// A new loan is issued from a borrow expression (e.g., &x).
+ Issue,
+ /// A loan expires as its underlying storage is freed (e.g., variable goes
+ /// out of scope).
+ Expire,
+ /// An origin is propagated from a source to a destination (e.g., p = q).
+ AssignOrigin,
+ /// An origin is part of a function's return value.
----------------
ymand wrote:
Can you clarify the language here? The other three all describe "events", but this one seems to state a fact.
https://github.com/llvm/llvm-project/pull/142313
More information about the cfe-commits
mailing list