[clang] [clang][Sema] Move the initializer lifetime checking code from SemaInit.cpp to a new place, NFC (PR #96758)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 26 05:32:01 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Haojian Wu (hokein)

<details>
<summary>Changes</summary>

This is a refactoring change for better code isolation and reuse, the first step to extend it for assignments.

---

Patch is 102.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96758.diff


4 Files Affected:

- (modified) clang/lib/Sema/CMakeLists.txt (+1) 
- (added) clang/lib/Sema/CheckExprLifetime.cpp (+1258) 
- (added) clang/lib/Sema/CheckExprLifetime.h (+29) 
- (modified) clang/lib/Sema/SemaInit.cpp (+2-1229) 


``````````diff
diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt
index f152d243d39a5..980a83d4431aa 100644
--- a/clang/lib/Sema/CMakeLists.txt
+++ b/clang/lib/Sema/CMakeLists.txt
@@ -15,6 +15,7 @@ clang_tablegen(OpenCLBuiltins.inc -gen-clang-opencl-builtins
 
 add_clang_library(clangSema
   AnalysisBasedWarnings.cpp
+  CheckExprLifetime.cpp
   CodeCompleteConsumer.cpp
   DeclSpec.cpp
   DelayedDiagnostic.cpp
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
new file mode 100644
index 0000000000000..2b44330ef0336
--- /dev/null
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -0,0 +1,1258 @@
+//===--- CheckExprLifetime.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CheckExprLifetime.h"
+#include "clang/AST/Expr.h"
+#include "clang/Sema/Sema.h"
+#include "llvm/ADT/PointerIntPair.h"
+
+namespace clang::sema {
+namespace {
+enum LifetimeKind {
+  /// The lifetime of a temporary bound to this entity ends at the end of the
+  /// full-expression, and that's (probably) fine.
+  LK_FullExpression,
+
+  /// The lifetime of a temporary bound to this entity is extended to the
+  /// lifeitme of the entity itself.
+  LK_Extended,
+
+  /// The lifetime of a temporary bound to this entity probably ends too soon,
+  /// because the entity is allocated in a new-expression.
+  LK_New,
+
+  /// The lifetime of a temporary bound to this entity ends too soon, because
+  /// the entity is a return object.
+  LK_Return,
+
+  /// The lifetime of a temporary bound to this entity ends too soon, because
+  /// the entity is the result of a statement expression.
+  LK_StmtExprResult,
+
+  /// This is a mem-initializer: if it would extend a temporary (other than via
+  /// a default member initializer), the program is ill-formed.
+  LK_MemInitializer,
+};
+using LifetimeResult =
+    llvm::PointerIntPair<const InitializedEntity *, 3, LifetimeKind>;
+}
+
+/// Determine the declaration which an initialized entity ultimately refers to,
+/// for the purpose of lifetime-extending a temporary bound to a reference in
+/// the initialization of \p Entity.
+static LifetimeResult getEntityLifetime(
+    const InitializedEntity *Entity,
+    const InitializedEntity *InitField = nullptr) {
+  // C++11 [class.temporary]p5:
+  switch (Entity->getKind()) {
+  case InitializedEntity::EK_Variable:
+    //   The temporary [...] persists for the lifetime of the reference
+    return {Entity, LK_Extended};
+
+
+  case InitializedEntity::EK_Member:
+    // For subobjects, we look at the complete object.
+    if (Entity->getParent())
+      return getEntityLifetime(Entity->getParent(), Entity);
+
+    //   except:
+    // C++17 [class.base.init]p8:
+    //   A temporary expression bound to a reference member in a
+    //   mem-initializer is ill-formed.
+    // C++17 [class.base.init]p11:
+    //   A temporary expression bound to a reference member from a
+    //   default member initializer is ill-formed.
+    //
+    // The context of p11 and its example suggest that it's only the use of a
+    // default member initializer from a constructor that makes the program
+    // ill-formed, not its mere existence, and that it can even be used by
+    // aggregate initialization.
+    return {Entity, Entity->isDefaultMemberInitializer() ? LK_Extended
+                                                         : LK_MemInitializer};
+
+  case InitializedEntity::EK_Binding:
+    // Per [dcl.decomp]p3, the binding is treated as a variable of reference
+    // type.
+    return {Entity, LK_Extended};
+
+  case InitializedEntity::EK_Parameter:
+  case InitializedEntity::EK_Parameter_CF_Audited:
+    //   -- A temporary bound to a reference parameter in a function call
+    //      persists until the completion of the full-expression containing
+    //      the call.
+    return {nullptr, LK_FullExpression};
+
+  case InitializedEntity::EK_TemplateParameter:
+    // FIXME: This will always be ill-formed; should we eagerly diagnose it here?
+    return {nullptr, LK_FullExpression};
+
+  case InitializedEntity::EK_Result:
+    //   -- The lifetime of a temporary bound to the returned value in a
+    //      function return statement is not extended; the temporary is
+    //      destroyed at the end of the full-expression in the return statement.
+    return {nullptr, LK_Return};
+
+  case InitializedEntity::EK_StmtExprResult:
+    // FIXME: Should we lifetime-extend through the result of a statement
+    // expression?
+    return {nullptr, LK_StmtExprResult};
+
+  case InitializedEntity::EK_New:
+    //   -- A temporary bound to a reference in a new-initializer persists
+    //      until the completion of the full-expression containing the
+    //      new-initializer.
+    return {nullptr, LK_New};
+
+  case InitializedEntity::EK_Temporary:
+  case InitializedEntity::EK_CompoundLiteralInit:
+  case InitializedEntity::EK_RelatedResult:
+    // We don't yet know the storage duration of the surrounding temporary.
+    // Assume it's got full-expression duration for now, it will patch up our
+    // storage duration if that's not correct.
+    return {nullptr, LK_FullExpression};
+
+  case InitializedEntity::EK_ArrayElement:
+    // For subobjects, we look at the complete object.
+    return getEntityLifetime(Entity->getParent(), InitField);
+
+  case InitializedEntity::EK_Base:
+    // For subobjects, we look at the complete object.
+    if (Entity->getParent())
+      return getEntityLifetime(Entity->getParent(), InitField);
+    return {InitField, LK_MemInitializer};
+
+  case InitializedEntity::EK_Delegating:
+    // We can reach this case for aggregate initialization in a constructor:
+    //   struct A { int &&r; };
+    //   struct B : A { B() : A{0} {} };
+    // In this case, use the outermost field decl as the context.
+    return {InitField, LK_MemInitializer};
+
+  case InitializedEntity::EK_BlockElement:
+  case InitializedEntity::EK_LambdaToBlockConversionBlockElement:
+  case InitializedEntity::EK_LambdaCapture:
+  case InitializedEntity::EK_VectorElement:
+  case InitializedEntity::EK_ComplexElement:
+    return {nullptr, LK_FullExpression};
+
+  case InitializedEntity::EK_Exception:
+    // FIXME: Can we diagnose lifetime problems with exceptions?
+    return {nullptr, LK_FullExpression};
+
+  case InitializedEntity::EK_ParenAggInitMember:
+    //   -- A temporary object bound to a reference element of an aggregate of
+    //      class type initialized from a parenthesized expression-list
+    //      [dcl.init, 9.3] persists until the completion of the full-expression
+    //      containing the expression-list.
+    return {nullptr, LK_FullExpression};
+  }
+
+  llvm_unreachable("unknown entity kind");
+}
+
+namespace {
+enum ReferenceKind {
+  /// Lifetime would be extended by a reference binding to a temporary.
+  RK_ReferenceBinding,
+  /// Lifetime would be extended by a std::initializer_list object binding to
+  /// its backing array.
+  RK_StdInitializerList,
+};
+
+/// A temporary or local variable. This will be one of:
+///  * A MaterializeTemporaryExpr.
+///  * A DeclRefExpr whose declaration is a local.
+///  * An AddrLabelExpr.
+///  * A BlockExpr for a block with captures.
+using Local = Expr*;
+
+/// Expressions we stepped over when looking for the local state. Any steps
+/// that would inhibit lifetime extension or take us out of subexpressions of
+/// the initializer are included.
+struct IndirectLocalPathEntry {
+  enum EntryKind {
+    DefaultInit,
+    AddressOf,
+    VarInit,
+    LValToRVal,
+    LifetimeBoundCall,
+    TemporaryCopy,
+    LambdaCaptureInit,
+    GslReferenceInit,
+    GslPointerInit
+  } Kind;
+  Expr *E;
+  union {
+    const Decl *D = nullptr;
+    const LambdaCapture *Capture;
+  };
+  IndirectLocalPathEntry() {}
+  IndirectLocalPathEntry(EntryKind K, Expr *E) : Kind(K), E(E) {}
+  IndirectLocalPathEntry(EntryKind K, Expr *E, const Decl *D)
+      : Kind(K), E(E), D(D) {}
+  IndirectLocalPathEntry(EntryKind K, Expr *E, const LambdaCapture *Capture)
+      : Kind(K), E(E), Capture(Capture) {}
+};
+
+using IndirectLocalPath = llvm::SmallVectorImpl<IndirectLocalPathEntry>;
+
+struct RevertToOldSizeRAII {
+  IndirectLocalPath &Path;
+  unsigned OldSize = Path.size();
+  RevertToOldSizeRAII(IndirectLocalPath &Path) : Path(Path) {}
+  ~RevertToOldSizeRAII() { Path.resize(OldSize); }
+};
+
+using LocalVisitor = llvm::function_ref<bool(IndirectLocalPath &Path, Local L,
+                                             ReferenceKind RK)>;
+}
+
+static bool isVarOnPath(IndirectLocalPath &Path, VarDecl *VD) {
+  for (auto E : Path)
+    if (E.Kind == IndirectLocalPathEntry::VarInit && E.D == VD)
+      return true;
+  return false;
+}
+
+static bool pathContainsInit(IndirectLocalPath &Path) {
+  return llvm::any_of(Path, [=](IndirectLocalPathEntry E) {
+    return E.Kind == IndirectLocalPathEntry::DefaultInit ||
+           E.Kind == IndirectLocalPathEntry::VarInit;
+  });
+}
+
+static void visitLocalsRetainedByInitializer(IndirectLocalPath &Path,
+                                             Expr *Init, LocalVisitor Visit,
+                                             bool RevisitSubinits,
+                                             bool EnableLifetimeWarnings);
+
+static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
+                                                  Expr *Init, ReferenceKind RK,
+                                                  LocalVisitor Visit,
+                                                  bool EnableLifetimeWarnings);
+
+template <typename T> static bool isRecordWithAttr(QualType Type) {
+  if (auto *RD = Type->getAsCXXRecordDecl())
+    return RD->hasAttr<T>();
+  return false;
+}
+
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  if (!DC)
+    return false;
+  if (const auto *ND = dyn_cast<NamespaceDecl>(DC))
+    if (const IdentifierInfo *II = ND->getIdentifier()) {
+      StringRef Name = II->getName();
+      if (Name.size() >= 2 && Name.front() == '_' &&
+          (Name[1] == '_' || isUppercase(Name[1])))
+        return true;
+    }
+
+  return DC->isStdNamespace();
+}
+
+static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+  if (auto *Conv = dyn_cast_or_null<CXXConversionDecl>(Callee))
+    if (isRecordWithAttr<PointerAttr>(Conv->getConversionType()))
+      return true;
+  if (!isInStlNamespace(Callee->getParent()))
+    return false;
+  if (!isRecordWithAttr<PointerAttr>(
+          Callee->getFunctionObjectParameterType()) &&
+      !isRecordWithAttr<OwnerAttr>(Callee->getFunctionObjectParameterType()))
+    return false;
+  if (Callee->getReturnType()->isPointerType() ||
+      isRecordWithAttr<PointerAttr>(Callee->getReturnType())) {
+    if (!Callee->getIdentifier())
+      return false;
+    return llvm::StringSwitch<bool>(Callee->getName())
+        .Cases("begin", "rbegin", "cbegin", "crbegin", true)
+        .Cases("end", "rend", "cend", "crend", true)
+        .Cases("c_str", "data", "get", true)
+        // Map and set types.
+        .Cases("find", "equal_range", "lower_bound", "upper_bound", true)
+        .Default(false);
+  } else if (Callee->getReturnType()->isReferenceType()) {
+    if (!Callee->getIdentifier()) {
+      auto OO = Callee->getOverloadedOperator();
+      return OO == OverloadedOperatorKind::OO_Subscript ||
+             OO == OverloadedOperatorKind::OO_Star;
+    }
+    return llvm::StringSwitch<bool>(Callee->getName())
+        .Cases("front", "back", "at", "top", "value", true)
+        .Default(false);
+  }
+  return false;
+}
+
+static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
+  if (!FD->getIdentifier() || FD->getNumParams() != 1)
+    return false;
+  const auto *RD = FD->getParamDecl(0)->getType()->getPointeeCXXRecordDecl();
+  if (!FD->isInStdNamespace() || !RD || !RD->isInStdNamespace())
+    return false;
+  if (!isRecordWithAttr<PointerAttr>(QualType(RD->getTypeForDecl(), 0)) &&
+      !isRecordWithAttr<OwnerAttr>(QualType(RD->getTypeForDecl(), 0)))
+    return false;
+  if (FD->getReturnType()->isPointerType() ||
+      isRecordWithAttr<PointerAttr>(FD->getReturnType())) {
+    return llvm::StringSwitch<bool>(FD->getName())
+        .Cases("begin", "rbegin", "cbegin", "crbegin", true)
+        .Cases("end", "rend", "cend", "crend", true)
+        .Case("data", true)
+        .Default(false);
+  } else if (FD->getReturnType()->isReferenceType()) {
+    return llvm::StringSwitch<bool>(FD->getName())
+        .Cases("get", "any_cast", true)
+        .Default(false);
+  }
+  return false;
+}
+
+static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call,
+                                    LocalVisitor Visit) {
+  auto VisitPointerArg = [&](const Decl *D, Expr *Arg, bool Value) {
+    // We are not interested in the temporary base objects of gsl Pointers:
+    //   Temp().ptr; // Here ptr might not dangle.
+    if (isa<MemberExpr>(Arg->IgnoreImpCasts()))
+      return;
+    // Once we initialized a value with a reference, it can no longer dangle.
+    if (!Value) {
+      for (const IndirectLocalPathEntry &PE : llvm::reverse(Path)) {
+        if (PE.Kind == IndirectLocalPathEntry::GslReferenceInit)
+          continue;
+        if (PE.Kind == IndirectLocalPathEntry::GslPointerInit)
+          return;
+        break;
+      }
+    }
+    Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit
+                          : IndirectLocalPathEntry::GslReferenceInit,
+                    Arg, D});
+    if (Arg->isGLValue())
+      visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
+                                            Visit,
+                                            /*EnableLifetimeWarnings=*/true);
+    else
+      visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
+                                       /*EnableLifetimeWarnings=*/true);
+    Path.pop_back();
+  };
+
+  if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) {
+    const auto *MD = cast_or_null<CXXMethodDecl>(MCE->getDirectCallee());
+    if (MD && shouldTrackImplicitObjectArg(MD))
+      VisitPointerArg(MD, MCE->getImplicitObjectArgument(),
+                      !MD->getReturnType()->isReferenceType());
+    return;
+  } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(Call)) {
+    FunctionDecl *Callee = OCE->getDirectCallee();
+    if (Callee && Callee->isCXXInstanceMember() &&
+        shouldTrackImplicitObjectArg(cast<CXXMethodDecl>(Callee))) 
+      VisitPointerArg(Callee, OCE->getArg(0),
+                      !Callee->getReturnType()->isReferenceType()); 
+    return;
+  } else if (auto *CE = dyn_cast<CallExpr>(Call)) {
+    FunctionDecl *Callee = CE->getDirectCallee();
+    if (Callee && shouldTrackFirstArgument(Callee))
+      VisitPointerArg(Callee, CE->getArg(0),
+                      !Callee->getReturnType()->isReferenceType());
+    return;
+  }
+
+  if (auto *CCE = dyn_cast<CXXConstructExpr>(Call)) {
+    const auto *Ctor = CCE->getConstructor();
+    const CXXRecordDecl *RD = Ctor->getParent();
+    if (CCE->getNumArgs() > 0 && RD->hasAttr<PointerAttr>())
+      VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0], true);
+  }
+}
+
+static bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD) {
+  const TypeSourceInfo *TSI = FD->getTypeSourceInfo();
+  if (!TSI)
+    return false;
+  // Don't declare this variable in the second operand of the for-statement;
+  // GCC miscompiles that by ending its lifetime before evaluating the
+  // third operand. See gcc.gnu.org/PR86769.
+  AttributedTypeLoc ATL;
+  for (TypeLoc TL = TSI->getTypeLoc();
+       (ATL = TL.getAsAdjusted<AttributedTypeLoc>());
+       TL = ATL.getModifiedLoc()) {
+    if (ATL.getAttrAs<LifetimeBoundAttr>())
+      return true;
+  }
+
+  // Assume that all assignment operators with a "normal" return type return
+  // *this, that is, an lvalue reference that is the same type as the implicit
+  // object parameter (or the LHS for a non-member operator$=).
+  OverloadedOperatorKind OO = FD->getDeclName().getCXXOverloadedOperator();
+  if (OO == OO_Equal || isCompoundAssignmentOperator(OO)) {
+    QualType RetT = FD->getReturnType();
+    if (RetT->isLValueReferenceType()) {
+      ASTContext &Ctx = FD->getASTContext();
+      QualType LHST;
+      auto *MD = dyn_cast<CXXMethodDecl>(FD);
+      if (MD && MD->isCXXInstanceMember())
+        LHST = Ctx.getLValueReferenceType(MD->getFunctionObjectParameterType());
+      else
+        LHST = MD->getParamDecl(0)->getType();
+      if (Ctx.hasSameType(RetT, LHST))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+static void visitLifetimeBoundArguments(IndirectLocalPath &Path, Expr *Call,
+                                        LocalVisitor Visit) {
+  const FunctionDecl *Callee;
+  ArrayRef<Expr*> Args;
+
+  if (auto *CE = dyn_cast<CallExpr>(Call)) {
+    Callee = CE->getDirectCallee();
+    Args = llvm::ArrayRef(CE->getArgs(), CE->getNumArgs());
+  } else {
+    auto *CCE = cast<CXXConstructExpr>(Call);
+    Callee = CCE->getConstructor();
+    Args = llvm::ArrayRef(CCE->getArgs(), CCE->getNumArgs());
+  }
+  if (!Callee)
+    return;
+
+  Expr *ObjectArg = nullptr;
+  if (isa<CXXOperatorCallExpr>(Call) && Callee->isCXXInstanceMember()) {
+    ObjectArg = Args[0];
+    Args = Args.slice(1);
+  } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Call)) {
+    ObjectArg = MCE->getImplicitObjectArgument();
+  }
+
+  auto VisitLifetimeBoundArg = [&](const Decl *D, Expr *Arg) {
+    Path.push_back({IndirectLocalPathEntry::LifetimeBoundCall, Arg, D});
+    if (Arg->isGLValue())
+      visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding,
+                                            Visit,
+                                            /*EnableLifetimeWarnings=*/false);
+    else
+      visitLocalsRetainedByInitializer(Path, Arg, Visit, true,
+                                       /*EnableLifetimeWarnings=*/false);
+    Path.pop_back();
+  };
+
+  bool CheckCoroCall = false;
+  if (const auto *RD = Callee->getReturnType()->getAsRecordDecl()) {
+    CheckCoroCall = RD->hasAttr<CoroLifetimeBoundAttr>() &&
+                    RD->hasAttr<CoroReturnTypeAttr>() &&
+                    !Callee->hasAttr<CoroDisableLifetimeBoundAttr>();
+  }
+
+  if (ObjectArg) {
+    bool CheckCoroObjArg = CheckCoroCall;
+    // Coroutine lambda objects with empty capture list are not lifetimebound.
+    if (auto *LE = dyn_cast<LambdaExpr>(ObjectArg->IgnoreImplicit());
+        LE && LE->captures().empty())
+      CheckCoroObjArg = false;
+    // Allow `get_return_object()` as the object param (__promise) is not
+    // lifetimebound.
+    if (Sema::CanBeGetReturnObject(Callee))
+      CheckCoroObjArg = false;
+    if (implicitObjectParamIsLifetimeBound(Callee) || CheckCoroObjArg)
+      VisitLifetimeBoundArg(Callee, ObjectArg);
+  }
+
+  for (unsigned I = 0,
+                N = std::min<unsigned>(Callee->getNumParams(), Args.size());
+       I != N; ++I) {
+    if (CheckCoroCall || Callee->getParamDecl(I)->hasAttr<LifetimeBoundAttr>())
+      VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]);
+  }
+}
+
+/// Visit the locals that would be reachable through a reference bound to the
+/// glvalue expression \c Init.
+static void visitLocalsRetainedByReferenceBinding(IndirectLocalPath &Path,
+                                                  Expr *Init, ReferenceKind RK,
+                                                  LocalVisitor Visit,
+                                                  bool EnableLifetimeWarnings) {
+  RevertToOldSizeRAII RAII(Path);
+
+  // Walk past any constructs which we can lifetime-extend across.
+  Expr *Old;
+  do {
+    Old = Init;
+
+    if (auto *FE = dyn_cast<FullExpr>(Init))
+      Init = FE->getSubExpr();
+
+    if (InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
+      // If this is just redundant braces around an in...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/96758


More information about the cfe-commits mailing list