[clang] [Clang] Extend lifetime bound analysis to support assignments (PR #96475)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 24 03:43:38 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Haojian Wu (hokein)
<details>
<summary>Changes</summary>
The lifetime bound warning in Clang currently only considers initializations. This patch extends the warning to include assignments.
- **NFC refactoring (first commit)**: this moves the existing lifetime checking code from `SemaInit.cpp` to a new location for better code isolation and reuse.
- **Support for assignments of built-in pointer types (second commit)**: this is done is by reusing the existing statement-local implementation. Clang now warns if the pointer is assigned to a temporary object that being destoryed at the end of the full assignment expression.
With this patch, we will detect more cases under the on-by-default diagnostic `-Wdangling`. I have added a new category for this specific diagnostic so that people can temporarily disable it if their codebase is not yet clean.
This is the first step to address #<!-- -->63310, focusing only on pointer types. Support for C++ assignment operators will come in a follow-up patch.
---
Patch is 109.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96475.diff
10 Files Affected:
- (modified) clang/include/clang/Basic/DiagnosticGroups.td (+3-1)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+5)
- (modified) clang/lib/Sema/CMakeLists.txt (+1)
- (added) clang/lib/Sema/CheckExprLifetime.cpp (+1285)
- (added) clang/lib/Sema/CheckExprLifetime.h (+39)
- (modified) clang/lib/Sema/SemaExpr.cpp (+4)
- (modified) clang/lib/Sema/SemaInit.cpp (+3-1229)
- (modified) clang/test/Parser/compound_literal.c (+3-2)
- (modified) clang/test/SemaCXX/attr-lifetimebound.cpp (+6)
- (modified) clang/test/SemaCXX/warn-dangling-local.cpp (+2)
``````````diff
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 9b37d4bd3205b..e828d0c459651 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -430,6 +430,7 @@ def LogicalOpParentheses: DiagGroup<"logical-op-parentheses">;
def LogicalNotParentheses: DiagGroup<"logical-not-parentheses">;
def ShiftOpParentheses: DiagGroup<"shift-op-parentheses">;
def OverloadedShiftOpParentheses: DiagGroup<"overloaded-shift-op-parentheses">;
+def DanglingAssignment: DiagGroup<"dangling-assignment">;
def DanglingElse: DiagGroup<"dangling-else">;
def DanglingField : DiagGroup<"dangling-field">;
def DanglingInitializerList : DiagGroup<"dangling-initializer-list">;
@@ -437,7 +438,8 @@ def DanglingGsl : DiagGroup<"dangling-gsl">;
def ReturnStackAddress : DiagGroup<"return-stack-address">;
// Name of this warning in GCC
def : DiagGroup<"return-local-addr", [ReturnStackAddress]>;
-def Dangling : DiagGroup<"dangling", [DanglingField,
+def Dangling : DiagGroup<"dangling", [DanglingAssignment,
+ DanglingField,
DanglingInitializerList,
DanglingGsl,
ReturnStackAddress]>;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 25a87078a5709..207529660b37b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10092,6 +10092,11 @@ def warn_new_dangling_initializer_list : Warning<
"the allocated initializer list}0 "
"will be destroyed at the end of the full-expression">,
InGroup<DanglingInitializerList>;
+def warn_dangling_pointer_assignment : Warning<
+ "object backing the pointer %0 "
+ "will be destroyed at the end of the full-expression">,
+ InGroup<DanglingAssignment>;
+
def warn_unsupported_lifetime_extension : Warning<
"lifetime extension of "
"%select{temporary|backing array of initializer list}0 created "
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index f152d243d39a5..980a83d4431aa 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -15,6 +15,7 @@ clang_tablegen(OpenCLBuiltins.inc -gen-clang-opencl-builtins
add_clang_library(clangSema
AnalysisBasedWarnings.cpp
+ CheckExprLifetime.cpp
CodeCompleteConsumer.cpp
DeclSpec.cpp
DelayedDiagnostic.cpp
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
new file mode 100644
index 0000000000000..73b3fd2d3a138
--- /dev/null
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -0,0 +1,1285 @@
+//===--- CheckExprLifetime.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CheckExprLifetime.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/DiagnosticSema.h"
+#include "clang/Sema/Initialization.h"
+#include "clang/Sema/Sema.h"
+#include "llvm/ADT/PointerIntPair.h"
+
+namespace clang::sema {
+namespace {
+enum LifetimeKind {
+ /// The lifetime of a temporary bound to this entity ends at the end of the
+ /// full-expression, and that's (probably) fine.
+ LK_FullExpression,
+
+ /// The lifetime of a temporary bound to this entity is extended to the
+ /// lifeitme of the entity itself.
+ LK_Extended,
+
+ /// The lifetime of a temporary bound to this entity probably ends too soon,
+ /// because the entity is allocated in a new-expression.
+ LK_New,
+
+ /// The lifetime of a temporary bound to this entity ends too soon, because
+ /// the entity is a return object.
+ LK_Return,
+
+ /// The lifetime of a temporary bound to this entity ends too soon, because
+ /// the entity is the result of a statement expression.
+ LK_StmtExprResult,
+
+ /// This is a mem-initializer: if it would extend a temporary (other than via
+ /// a default member initializer), the program is ill-formed.
+ LK_MemInitializer,
+};
+using LifetimeResult =
+ llvm::PointerIntPair<const InitializedEntity *, 3, LifetimeKind>;
+}
+
+/// Determine the declaration which an initialized entity ultimately refers to,
+/// for the purpose of lifetime-extending a temporary bound to a reference in
+/// the initialization of \p Entity.
+static LifetimeResult getEntityLifetime(
+ const InitializedEntity *Entity,
+ const InitializedEntity *InitField = nullptr) {
+ // C++11 [class.temporary]p5:
+ switch (Entity->getKind()) {
+ case InitializedEntity::EK_Variable:
+ // The temporary [...] persists for the lifetime of the reference
+ return {Entity, LK_Extended};
+
+
+ case InitializedEntity::EK_Member:
+ // For subobjects, we look at the complete object.
+ if (Entity->getParent())
+ return getEntityLifetime(Entity->getParent(), Entity);
+
+ // except:
+ // C++17 [class.base.init]p8:
+ // A temporary expression bound to a reference member in a
+ // mem-initializer is ill-formed.
+ // C++17 [class.base.init]p11:
+ // A temporary expression bound to a reference member from a
+ // default member initializer is ill-formed.
+ //
+ // The context of p11 and its example suggest that it's only the use of a
+ // default member initializer from a constructor that makes the program
+ // ill-formed, not its mere existence, and that it can even be used by
+ // aggregate initialization.
+ return {Entity, Entity->isDefaultMemberInitializer() ? LK_Extended
+ : LK_MemInitializer};
+
+ case InitializedEntity::EK_Binding:
+ // Per [dcl.decomp]p3, the binding is treated as a variable of reference
+ // type.
+ return {Entity, LK_Extended};
+
+ case InitializedEntity::EK_Parameter:
+ case InitializedEntity::EK_Parameter_CF_Audited:
+ // -- A temporary bound to a reference parameter in a function call
+ // persists until the completion of the full-expression containing
+ // the call.
+ return {nullptr, LK_FullExpression};
+
+ case InitializedEntity::EK_TemplateParameter:
+ // FIXME: This will always be ill-formed; should we eagerly diagnose it here?
+ return {nullptr, LK_FullExpression};
+
+ case InitializedEntity::EK_Result:
+ // -- The lifetime of a temporary bound to the returned value in a
+ // function return statement is not extended; the temporary is
+ // destroyed at the end of the full-expression in the return statement.
+ return {nullptr, LK_Return};
+
+ case InitializedEntity::EK_StmtExprResult:
+ // FIXME: Should we lifetime-extend through the result of a statement
+ // expression?
+ return {nullptr, LK_StmtExprResult};
+
+ case InitializedEntity::EK_New:
+ // -- A temporary bound to a reference in a new-initializer persists
+ // until the completion of the full-expression containing the
+ // new-initializer.
+ return {nullptr, LK_New};
+
+ case InitializedEntity::EK_Temporary:
+ case InitializedEntity::EK_CompoundLiteralInit:
+ case InitializedEntity::EK_RelatedResult:
+ // We don't yet know the storage duration of the surrounding temporary.
+ // Assume it's got full-expression duration for now, it will patch up our
+ // storage duration if that's not correct.
+ return {nullptr, LK_FullExpression};
+
+ case InitializedEntity::EK_ArrayElement:
+ // For subobjects, we look at the complete object.
+ return getEntityLifetime(Entity->getParent(), InitField);
+
+ case InitializedEntity::EK_Base:
+ // For subobjects, we look at the complete object.
+ if (Entity->getParent())
+ return getEntityLifetime(Entity->getParent(), InitField);
+ return {InitField, LK_MemInitializer};
+
+ case InitializedEntity::EK_Delegating:
+ // We can reach this case for aggregate initialization in a constructor:
+ // struct A { int &&r; };
+ // struct B : A { B() : A{0} {} };
+ // In this case, use the outermost field decl as the context.
+ return {InitField, LK_MemInitializer};
+
+ case InitializedEntity::EK_BlockElement:
+ case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
+ case InitializedEntity::EK_LambdaCapture:
+ case InitializedEntity::EK_VectorElement:
+ case InitializedEntity::EK_ComplexElement:
+ return {nullptr, LK_FullExpression};
+
+ case InitializedEntity::EK_Exception:
+ // FIXME: Can we diagnose lifetime problems with exceptions?
+ return {nullptr, LK_FullExpression};
+
+ case InitializedEntity::EK_ParenAggInitMember:
+ // -- A temporary object bound to a reference element of an aggregate of
+ // class type initialized from a parenthesized expression-list
+ // [dcl.init, 9.3] persists until the completion of the full-expression
+ // containing the expression-list.
+ return {nullptr, LK_FullExpression};
+ }
+
+ llvm_unreachable("unknown entity kind");
+}
+
+namespace {
+enum ReferenceKind {
+ /// Lifetime would be extended by a reference binding to a temporary.
+ RK_ReferenceBinding,
+ /// Lifetime would be extended by a std::initializer_list object binding to
+ /// its backing array.
+ RK_StdInitializerList,
+};
+
+/// A temporary or local variable. This will be one of:
+/// * A MaterializeTemporaryExpr.
+/// * A DeclRefExpr whose declaration is a local.
+/// * An AddrLabelExpr.
+/// * A BlockExpr for a block with captures.
+using Local = Expr*;
+
+/// Expressions we stepped over when looking for the local state. Any steps
+/// that would inhibit lifetime extension or take us out of subexpressions of
+/// the initializer are included.
+struct IndirectLocalPathEntry {
+ enum EntryKind {
+ DefaultInit,
+ AddressOf,
+ VarInit,
+ LValToRVal,
+ LifetimeBoundCall,
+ TemporaryCopy,
+ LambdaCaptureInit,
+ GslReferenceInit,
+ GslPointerInit
+ } Kind;
+ Expr *E;
+ union {
+ const Decl *D = nullptr;
+ const LambdaCapture *Capture;
+ };
+ IndirectLocalPathEntry() {}
+ IndirectLocalPathEntry(EntryKind K, Expr *E) : Kind(K), E(E) {}
+ IndirectLocalPathEntry(EntryKind K, Expr *E, const Decl *D)
+ : Kind(K), E(E), D(D) {}
+ IndirectLocalPathEntry(EntryKind K, Expr *E, const LambdaCapture *Capture)
+ : Kind(K), E(E), Capture(Capture) {}
+};
+
+using IndirectLocalPath = llvm::SmallVectorImpl<IndirectLocalPathEntry>;
+
+struct RevertToOldSizeRAII {
+ IndirectLocalPath &Path;
+ unsigned OldSize = Path.size();
+ RevertToOldSizeRAII(IndirectLocalPath &Path) : Path(Path) {}
+ ~RevertToOldSizeRAII() { Path.resize(OldSize); }
+};
+
+using LocalVisitor = llvm::function_ref<bool(IndirectLocalPath &Path, Local L,
+ ReferenceKind RK)>;
+} // namespace
+
+static bool isVarOnPath(IndirectLocalPath &Path, VarDecl *VD) {
+ for (auto E : Path)
+ if (E.Kind == IndirectLocalPathEntry::VarInit && E.D == VD)
+ return true;
+ return false;
+}
+
+static bool pathContainsInit(IndirectLocalPath &Path) {
+ return llvm::any_of(Path, [=](IndirectLocalPathEntry E) {
+ return E.Kind == IndirectLocalPathEntry::DefaultInit ||
+ E.Kind == IndirectLocalPathEntry::VarInit;
+ });
+}
+
+static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
+ Expr *Init, LocalVisitor Visit,
+ bool RevisitSubinits,
+ bool EnableLifetimeWarnings);
+
+static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
+ Expr *Init, ReferenceKind RK,
+ LocalVisitor Visit,
+ bool EnableLifetimeWarnings);
+
+template <typename T> static bool isRecordWithAttr(QualType Type) {
+ if (auto *RD = Type->getAsCXXRecordDecl())
+ return RD->hasAttr<T>();
+ return false;
+}
+
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+ const DeclContext *DC = D->getDeclContext();
+ if (!DC)
+ return false;
+ if (const auto *ND = dyn_cast<NamespaceDecl>(DC))
+ if (const IdentifierInfo *II = ND->getIdentifier()) {
+ StringRef Name = II->getName();
+ if (Name.size() >= 2 && Name.front() == '_' &&
+ (Name[1] == '_' || isUppercase(Name[1])))
+ return true;
+ }
+
+ return DC->isStdNamespace();
+}
+
+static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+ if (auto *Conv = dyn_cast_or_null<CXXConversionDecl>(Callee))
+ if (isRecordWithAttr<PointerAttr>(Conv->getConversionType()))
+ return true;
+ if (!isInStlNamespace(Callee->getParent()))
+ return false;
+ if (!isRecordWithAttr<PointerAttr>(
+ Callee->getFunctionObjectParameterType()) &&
+ !isRecordWithAttr<OwnerAttr>(Callee->getFunctionObjectParameterType()))
+ return false;
+ if (Callee->getReturnType()->isPointerType() ||
+ isRecordWithAttr<PointerAttr>(Callee->getReturnType())) {
+ if (!Callee->getIdentifier())
+ return false;
+ return llvm::StringSwitch<bool>(Callee->getName())
+ .Cases("begin", "rbegin", "cbegin", "crbegin", true)
+ .Cases("end", "rend", "cend", "crend", true)
+ .Cases("c_str", "data", "get", true)
+ // Map and set types.
+ .Cases("find", "equal_range", "lower_bound", "upper_bound", true)
+ .Default(false);
+ } else if (Callee->getReturnType()->isReferenceType()) {
+ if (!Callee->getIdentifier()) {
+ auto OO = Callee->getOverloadedOperator();
+ return OO == OverloadedOperatorKind::OO_Subscript ||
+ OO == OverloadedOperatorKind::OO_Star;
+ }
+ return llvm::StringSwitch<bool>(Callee->getName())
+ .Cases("front", "back", "at", "top", "value", true)
+ .Default(false);
+ }
+ return false;
+}
+
+static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
+ if (!FD->getIdentifier() || FD->getNumParams() != 1)
+ return false;
+ const auto *RD = FD->getParamDecl(0)->getType()->getPointeeCXXRecordDecl();
+ if (!FD->isInStdNamespace() || !RD || !RD->isInStdNamespace())
+ return false;
+ if (!isRecordWithAttr<PointerAttr>(QualType(RD->getTypeForDecl(), 0)) &&
+ !isRecordWithAttr<OwnerAttr>(QualType(RD->getTypeForDecl(), 0)))
+ return false;
+ if (FD->getReturnType()->isPointerType() ||
+ isRecordWithAttr<PointerAttr>(FD->getReturnType())) {
+ return llvm::StringSwitch<bool>(FD->getName())
+ .Cases("begin", "rbegin", "cbegin", "crbegin", true)
+ .Cases("end", "rend", "cend", "crend", true)
+ .Case("data", true)
+ .Default(false);
+ } else if (FD->getReturnType()->isReferenceType()) {
+ return llvm::StringSwitch<bool>(FD->getName())
+ .Cases("get", "any_cast", true)
+ .Default(false);
+ }
+ return false;
+}
+
+static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call,
+ LocalVisitor Visit) {
+ auto VisitPointerArg = [&](const Decl *D, Expr *Arg, bool Value) {
+ // We are not interested in the temporary base objects of gsl Pointers:
+ // Temp().ptr; // Here ptr might not dangle.
+ if (isa<MemberExpr>(Arg->IgnoreImpCasts()))
+ return;
+ // Once we initialized a value with a reference, it can no longer dangle.
+ if (!Value) {
+ for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) {
+ if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit)
+ continue;
+ if (PE.Kind == IndirectLocalPathEntry::GslPointerInit)
+ return;
+ break;
+ }
+ }
+ Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit
+ : IndirectLocalPathEntry::GslReferenceInit,
+ Arg, D});
+ if (Arg->isGLValue())
+ visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
+ Visit,
+ /*EnableLifetimeWarnings=*/true);
+ else
+ visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
+ /*EnableLifetimeWarnings=*/true);
+ Path.pop_back();
+ };
+
+ if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) {
+ const auto *MD = cast_or_null<CXXMethodDecl>(MCE->getDirectCallee());
+ if (MD && shouldTrackImplicitObjectArg(MD))
+ VisitPointerArg(MD, MCE->getImplicitObjectArgument(),
+ !MD->getReturnType()->isReferenceType());
+ return;
+ } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(Call)) {
+ FunctionDecl *Callee = OCE->getDirectCallee();
+ if (Callee && Callee->isCXXInstanceMember() &&
+ shouldTrackImplicitObjectArg(cast<CXXMethodDecl>(Callee)))
+ VisitPointerArg(Callee, OCE->getArg(0),
+ !Callee->getReturnType()->isReferenceType());
+ return;
+ } else if (auto *CE = dyn_cast<CallExpr>(Call)) {
+ FunctionDecl *Callee = CE->getDirectCallee();
+ if (Callee && shouldTrackFirstArgument(Callee))
+ VisitPointerArg(Callee, CE->getArg(0),
+ !Callee->getReturnType()->isReferenceType());
+ return;
+ }
+
+ if (auto *CCE = dyn_cast<CXXConstructExpr>(Call)) {
+ const auto *Ctor = CCE->getConstructor();
+ const CXXRecordDecl *RD = Ctor->getParent();
+ if (CCE->getNumArgs() > 0 && RD->hasAttr<PointerAttr>())
+ VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0], true);
+ }
+}
+
+static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) {
+ const TypeSourceInfo *TSI = FD->getTypeSourceInfo();
+ if (!TSI)
+ return false;
+ // Don't declare this variable in the second operand of the for-statement;
+ // GCC miscompiles that by ending its lifetime before evaluating the
+ // third operand. See gcc.gnu.org/PR86769.
+ AttributedTypeLoc ATL;
+ for (TypeLoc TL = TSI->getTypeLoc();
+ (ATL = TL.getAsAdjusted<AttributedTypeLoc>());
+ TL = ATL.getModifiedLoc()) {
+ if (ATL.getAttrAs<LifetimeBoundAttr>())
+ return true;
+ }
+
+ // Assume that all assignment operators with a "normal" return type return
+ // *this, that is, an lvalue reference that is the same type as the implicit
+ // object parameter (or the LHS for a non-member operator$=).
+ OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator();
+ if (OO == OO_Equal || isCompoundAssignmentOperator(OO)) {
+ QualType RetT = FD->getReturnType();
+ if (RetT->isLValueReferenceType()) {
+ ASTContext &Ctx = FD->getASTContext();
+ QualType LHST;
+ auto *MD = dyn_cast<CXXMethodDecl>(FD);
+ if (MD && MD->isCXXInstanceMember())
+ LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType());
+ else
+ LHST = MD->getParamDecl(0)->getType();
+ if (Ctx.hasSameType(RetT, LHST))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call,
+ LocalVisitor Visit) {
+ const FunctionDecl *Callee;
+ ArrayRef<Expr*> Args;
+
+ if (auto *CE = dyn_cast<CallExpr>(Call)) {
+ Callee = CE->getDirectCallee();
+ Args = llvm::ArrayRef(CE->getArgs(), CE->getNumArgs());
+ } else {
+ auto *CCE = cast<CXXConstructExpr>(Call);
+ Callee = CCE->getConstructor();
+ Args = llvm::ArrayRef(CCE->getArgs(), CCE->getNumArgs());
+ }
+ if (!Callee)
+ return;
+
+ Expr *ObjectArg = nullptr;
+ if (isa<CXXOperatorCallExpr>(Call) && Callee->isCXXInstanceMember()) {
+ ObjectArg = Args[0];
+ Args = Args.slice(1);
+ } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) {
+ ObjectArg = MCE->getImplicitObjectArgument();
+ }
+
+ auto VisitLifetimeBoundArg = [&](const Decl *D, Expr *Arg) {
+ Path.push_back({IndirectLocalPathEntry::LifetimeBoundCall, Arg, D});
+ if (Arg->isGLValue())
+ visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/96475
More information about the cfe-commits
mailing list