[clang] [clang] Fix self-capturing `__block` variables (PR #89475)

via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 4 12:50:45 PDT 2024


https://github.com/ille-apple updated https://github.com/llvm/llvm-project/pull/89475

>From 26be4b6332bf6b58b3d99bb0065b854dcce2a944 Mon Sep 17 00:00:00 2001
From: ille-apple <ille at apple.com>
Date: Fri, 19 Apr 2024 15:36:44 -0700
Subject: [PATCH 1/2] [clang] Fix self-capturing `__block` variables

Clang special-cases `__block` variables whose initializer contains a
block literal that captures the variable itself.  But this special case
doesn't work for variables of record types, causing Clang to emit broken
code.

Fix this by initializing such variables directly on the heap, and add a
warning for when this needs to be done.

rdar://70498802
---
 clang/include/clang/AST/Decl.h                |  22 ++-
 .../clang/Basic/DiagnosticSemaKinds.td        |   6 +
 clang/lib/AST/Decl.cpp                        |  13 ++
 clang/lib/CodeGen/CGBlocks.cpp                |  89 ++++++++----
 clang/lib/CodeGen/CGBlocks.h                  |   1 +
 clang/lib/CodeGen/CGDecl.cpp                  | 116 +++++-----------
 clang/lib/CodeGen/CodeGenFunction.h           |  12 +-
 clang/lib/CodeGen/CodeGenModule.h             |   8 +-
 clang/lib/Sema/Sema.cpp                       |  66 +++++++++
 clang/lib/Serialization/ASTReaderDecl.cpp     |   1 +
 clang/lib/Serialization/ASTWriterDecl.cpp     |   6 +-
 clang/test/CodeGenCXX/block-capture.cpp       | 127 +++++++++++++++++-
 12 files changed, 347 insertions(+), 120 deletions(-)

diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 5e485ccb85a13..83c54144366b5 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -1103,6 +1103,11 @@ class VarDecl : public DeclaratorDecl, public Redeclarable<VarDecl> {
 
     LLVM_PREFERRED_TYPE(bool)
     unsigned IsCXXCondDecl : 1;
+
+    /// Whether this is a __block variable that is captured by a block
+    /// referenced in its own initializer.
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned CapturedByOwnInit : 1;
   };
 
   union {
@@ -1588,10 +1593,23 @@ class VarDecl : public DeclaratorDecl, public Redeclarable<VarDecl> {
   /// escaping block.
   bool isNonEscapingByref() const;
 
-  void setEscapingByref() {
-    NonParmVarDeclBits.EscapingByref = true;
+  void setEscapingByref();
+
+  /// Indicates this is a __block variable that is captured by a block
+  /// referenced in its own initializer.
+  bool isCapturedByOwnInit() const {
+    return !isa<ParmVarDecl>(this) && NonParmVarDeclBits.CapturedByOwnInit;
   }
 
+  void setCapturedByOwnInit() {
+    assert(!isa<ParmVarDecl>(this));
+    NonParmVarDeclBits.CapturedByOwnInit = true;
+  }
+
+  /// Check if this is a __block variable which needs to be initialized
+  /// directly on the heap.
+  bool needsInitOnHeap() const;
+
   bool isCXXCondDecl() const {
     return isa<ParmVarDecl>(this) ? false : NonParmVarDeclBits.IsCXXCondDecl;
   }
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 5a32463763aa6..c1b214fa28f07 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2358,6 +2358,12 @@ def note_var_fixit_add_initialization : Note<
 def note_uninit_fixit_remove_cond : Note<
   "remove the %select{'%1' if its condition|condition if it}0 "
   "is always %select{false|true}2">;
+def warn_implicit_block_var_alloc : Warning<
+  "variable %q0 will be initialized in a heap allocation">,
+  InGroup<DiagGroup<"implicit-block-var-alloc">>, DefaultIgnore;
+def note_because_captured_by_block : Note<
+  "because it is captured by a block used in its own initializer">;
+
 def err_init_incomplete_type : Error<"initialization of incomplete type %0">;
 def err_list_init_in_parens : Error<
   "cannot initialize %select{non-class|reference}0 type %1 with a "
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 41fbfe281ef65..3a584849160a8 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -2680,6 +2680,19 @@ bool VarDecl::isNonEscapingByref() const {
   return hasAttr<BlocksAttr>() && !NonParmVarDeclBits.EscapingByref;
 }
 
+void VarDecl::setEscapingByref() {
+  assert(!isa<ParmVarDecl>(this) && hasAttr<BlocksAttr>());
+  NonParmVarDeclBits.EscapingByref = true;
+}
+
+bool VarDecl::needsInitOnHeap() const {
+  // We need direct initialization on the heap for self-capturing __block
+  // variables of types that cause CodeGenFunction::getEvaluationKind to return
+  // TEK_Aggregate.  The only such types that can be captured are records.
+  return isCapturedByOwnInit() &&
+         getType().getAtomicUnqualifiedType()->isRecordType();
+}
+
 bool VarDecl::hasDependentAlignment() const {
   QualType T = getType();
   return T->isDependentType() || T->isUndeducedType() ||
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index bf50f2025de57..2b381a5301180 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -45,6 +45,13 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
     name = name.substr(1);
 }
 
+BlockByrefHelpers::BlockByrefHelpers(const BlockByrefInfo &info)
+    : CopyHelper(nullptr), DisposeHelper(nullptr),
+      // The alignment we care about for the purposes of uniquing byref
+      // helpers is the alignment of the actual byref value field.
+      Alignment(info.ByrefAlignment.alignmentAtOffset(info.FieldOffset)),
+      ForceNoopCopy(info.ForceNoopCopy) {}
+
 // Anchor the vtable to this translation unit.
 BlockByrefHelpers::~BlockByrefHelpers() {}
 
@@ -2127,8 +2134,8 @@ class ObjectByrefHelpers final : public BlockByrefHelpers {
   BlockFieldFlags Flags;
 
 public:
-  ObjectByrefHelpers(CharUnits alignment, BlockFieldFlags flags)
-    : BlockByrefHelpers(alignment), Flags(flags) {}
+  ObjectByrefHelpers(const BlockByrefInfo &info, BlockFieldFlags flags)
+      : BlockByrefHelpers(info), Flags(flags) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2161,7 +2168,7 @@ class ObjectByrefHelpers final : public BlockByrefHelpers {
 /// Emits the copy/dispose helpers for an ARC __block __weak variable.
 class ARCWeakByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCWeakByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {}
+  ARCWeakByrefHelpers(const BlockByrefInfo &info) : BlockByrefHelpers(info) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2182,7 +2189,7 @@ class ARCWeakByrefHelpers final : public BlockByrefHelpers {
 /// that's not of block-pointer type.
 class ARCStrongByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCStrongByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {}
+  ARCStrongByrefHelpers(const BlockByrefInfo &info) : BlockByrefHelpers(info) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2218,8 +2225,8 @@ class ARCStrongByrefHelpers final : public BlockByrefHelpers {
 /// variable that's of block-pointer type.
 class ARCStrongBlockByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCStrongBlockByrefHelpers(CharUnits alignment)
-    : BlockByrefHelpers(alignment) {}
+  ARCStrongBlockByrefHelpers(const BlockByrefInfo &info)
+      : BlockByrefHelpers(info) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2248,9 +2255,9 @@ class CXXByrefHelpers final : public BlockByrefHelpers {
   const Expr *CopyExpr;
 
 public:
-  CXXByrefHelpers(CharUnits alignment, QualType type,
+  CXXByrefHelpers(const BlockByrefInfo &info, QualType type,
                   const Expr *copyExpr)
-    : BlockByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
+      : BlockByrefHelpers(info), VarType(type), CopyExpr(copyExpr) {}
 
   bool needsCopy() const override { return CopyExpr != nullptr; }
   void emitCopy(CodeGenFunction &CGF, Address destField,
@@ -2276,8 +2283,8 @@ class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers {
   QualType VarType;
 
 public:
-  NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type)
-    : BlockByrefHelpers(alignment), VarType(type) {}
+  NonTrivialCStructByrefHelpers(const BlockByrefInfo &info, QualType type)
+      : BlockByrefHelpers(info), VarType(type) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2336,7 +2343,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
     // Create a scope with an artificial location for the body of this function.
   auto AL = ApplyDebugLocation::CreateArtificial(CGF);
 
-  if (generator.needsCopy()) {
+  if (generator.needsCopy() && !generator.ForceNoopCopy) {
     // dst->x
     Address destField = CGF.GetAddrOfLocalVar(&Dst);
     destField = Address(CGF.Builder.CreateLoad(destField), byrefInfo.Type,
@@ -2458,26 +2465,21 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
 
   auto &byrefInfo = getBlockByrefInfo(&var);
 
-  // The alignment we care about for the purposes of uniquing byref
-  // helpers is the alignment of the actual byref value field.
-  CharUnits valueAlignment =
-    byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset);
-
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr =
         CGM.getContext().getBlockVarCopyInit(&var).getCopyExpr();
     if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
 
-    return ::buildByrefHelpers(
-        CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr));
+    return ::buildByrefHelpers(CGM, byrefInfo,
+                               CXXByrefHelpers(byrefInfo, type, copyExpr));
   }
 
   // If type is a non-trivial C struct type that is non-trivial to
   // destructly move or destroy, build the copy and dispose helpers.
   if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct ||
       type.isDestructedType() == QualType::DK_nontrivial_c_struct)
-    return ::buildByrefHelpers(
-        CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type));
+    return ::buildByrefHelpers(CGM, byrefInfo,
+                               NonTrivialCStructByrefHelpers(byrefInfo, type));
 
   // Otherwise, if we don't have a retainable type, there's nothing to do.
   // that the runtime does extra copies.
@@ -2499,7 +2501,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     // byref routines.
     case Qualifiers::OCL_Weak:
       return ::buildByrefHelpers(CGM, byrefInfo,
-                                 ARCWeakByrefHelpers(valueAlignment));
+                                 ARCWeakByrefHelpers(byrefInfo));
 
     // ARC __strong __block variables need to be retained.
     case Qualifiers::OCL_Strong:
@@ -2507,13 +2509,13 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
       // transfer possible.
       if (type->isBlockPointerType()) {
         return ::buildByrefHelpers(CGM, byrefInfo,
-                                   ARCStrongBlockByrefHelpers(valueAlignment));
+                                   ARCStrongBlockByrefHelpers(byrefInfo));
 
-      // Otherwise, we transfer ownership of the retain from the stack
-      // to the heap.
+        // Otherwise, we transfer ownership of the retain from the stack
+        // to the heap.
       } else {
         return ::buildByrefHelpers(CGM, byrefInfo,
-                                   ARCStrongByrefHelpers(valueAlignment));
+                                   ARCStrongByrefHelpers(byrefInfo));
       }
     }
     llvm_unreachable("fell out of lifetime switch!");
@@ -2533,7 +2535,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     flags |= BLOCK_FIELD_IS_WEAK;
 
   return ::buildByrefHelpers(CGM, byrefInfo,
-                             ObjectByrefHelpers(valueAlignment, flags));
+                             ObjectByrefHelpers(byrefInfo, flags));
 }
 
 Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr,
@@ -2653,11 +2655,39 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
   info.FieldOffset = varOffset;
   info.ByrefAlignment = std::max(varAlign, getPointerAlign());
 
+  // If we're initializing directly on the heap, then we should emit a
+  // no-op copy helper, both because we don't need a real one (the
+  // object will never move), and because a real one would break the
+  // pre-init _Block_object_assign.
+  info.ForceNoopCopy = D->needsInitOnHeap();
+
   auto pair = BlockByrefInfos.insert({D, info});
   assert(pair.second && "info was inserted recursively?");
   return pair.first->second;
 }
 
+void CodeGenFunction::emitByrefInitOnHeap(llvm::Value *P) {
+  // The object itself is initialized directly on the heap.  But for ABI
+  // backwards compatibility reasons, we have to set up a fake byref struct on
+  // the stack (with the structural components initialized but not the object
+  // itself), then call _Block_object_assign to move it to the heap (which is
+  // safe because we forced a no-op copy helper), then call
+  // _Block_object_dispose to release the extra ref from _Block_object_assign.
+  //
+  // 'P' points to the fake byref struct.
+
+  BlockFieldFlags flags = BLOCK_FIELD_IS_BYREF;
+  // Ignored out-parameter.  We'll use the forwarding pointer instead.
+  RawAddress out = CreateDefaultAlignTempAlloca(P->getType(), "initOnHeap.out");
+
+  llvm::Value *args[] = {Builder.CreateBitCast(out.getPointer(), VoidPtrTy),
+                         Builder.CreateBitCast(P, VoidPtrTy),
+                         llvm::ConstantInt::get(Int32Ty, flags.getBitMask())};
+  EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
+
+  BuildBlockRelease(P, flags, false);
+}
+
 /// Initialize the structural components of a __block variable, i.e.
 /// everything but the actual object.
 void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
@@ -2699,8 +2729,8 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
   storeHeaderField(V, getPointerSize(), "byref.isa");
 
   // Store the address of the variable into its own forwarding pointer.
-  storeHeaderField(addr.emitRawPointer(*this), getPointerSize(),
-                   "byref.forwarding");
+  llvm::Value *pointer = addr.emitRawPointer(*this);
+  storeHeaderField(pointer, getPointerSize(), "byref.forwarding");
 
   // Blocks ABI:
   //   c) the flags field is set to either 0 if no helper functions are
@@ -2764,6 +2794,9 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
     auto layoutInfo = CGM.getObjCRuntime().BuildByrefLayout(CGM, type);
     storeHeaderField(layoutInfo, getPointerSize(), "byref.layout");
   }
+
+  if (emission.NeedsInitOnHeap)
+    emitByrefInitOnHeap(pointer);
 }
 
 void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags,
diff --git a/clang/lib/CodeGen/CGBlocks.h b/clang/lib/CodeGen/CGBlocks.h
index 8d10c4f69b202..526dbbfd9b38d 100644
--- a/clang/lib/CodeGen/CGBlocks.h
+++ b/clang/lib/CodeGen/CGBlocks.h
@@ -139,6 +139,7 @@ class BlockByrefInfo {
   unsigned FieldIndex;
   CharUnits ByrefAlignment;
   CharUnits FieldOffset;
+  bool ForceNoopCopy;
 };
 
 /// Represents a type of copy/destroy operation that should be performed for an
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 4a213990d1e36..0bcc8de49365e 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1450,6 +1450,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
 
   bool isEscapingByRef = D.isEscapingByref();
   emission.IsEscapingByRef = isEscapingByRef;
+  emission.IsCapturedByOwnInit = D.isCapturedByOwnInit();
+  emission.NeedsInitOnHeap = D.needsInitOnHeap();
 
   CharUnits alignment = getContext().getDeclAlign(&D);
 
@@ -1692,68 +1694,6 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   return emission;
 }
 
-static bool isCapturedBy(const VarDecl &, const Expr *);
-
-/// Determines whether the given __block variable is potentially
-/// captured by the given statement.
-static bool isCapturedBy(const VarDecl &Var, const Stmt *S) {
-  if (const Expr *E = dyn_cast<Expr>(S))
-    return isCapturedBy(Var, E);
-  for (const Stmt *SubStmt : S->children())
-    if (isCapturedBy(Var, SubStmt))
-      return true;
-  return false;
-}
-
-/// Determines whether the given __block variable is potentially
-/// captured by the given expression.
-static bool isCapturedBy(const VarDecl &Var, const Expr *E) {
-  // Skip the most common kinds of expressions that make
-  // hierarchy-walking expensive.
-  E = E->IgnoreParenCasts();
-
-  if (const BlockExpr *BE = dyn_cast<BlockExpr>(E)) {
-    const BlockDecl *Block = BE->getBlockDecl();
-    for (const auto &I : Block->captures()) {
-      if (I.getVariable() == &Var)
-        return true;
-    }
-
-    // No need to walk into the subexpressions.
-    return false;
-  }
-
-  if (const StmtExpr *SE = dyn_cast<StmtExpr>(E)) {
-    const CompoundStmt *CS = SE->getSubStmt();
-    for (const auto *BI : CS->body())
-      if (const auto *BIE = dyn_cast<Expr>(BI)) {
-        if (isCapturedBy(Var, BIE))
-          return true;
-      }
-      else if (const auto *DS = dyn_cast<DeclStmt>(BI)) {
-          // special case declarations
-          for (const auto *I : DS->decls()) {
-              if (const auto *VD = dyn_cast<VarDecl>((I))) {
-                const Expr *Init = VD->getInit();
-                if (Init && isCapturedBy(Var, Init))
-                  return true;
-              }
-          }
-      }
-      else
-        // FIXME. Make safe assumption assuming arbitrary statements cause capturing.
-        // Later, provide code to poke into statements for capture analysis.
-        return true;
-    return false;
-  }
-
-  for (const Stmt *SubStmt : E->children())
-    if (isCapturedBy(Var, SubStmt))
-      return true;
-
-  return false;
-}
-
 /// Determine whether the given initializer is trivial in the sense
 /// that it requires no code to be generated.
 bool CodeGenFunction::isTrivialInitializer(const Expr *Init) {
@@ -1918,8 +1858,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   // Check whether this is a byref variable that's potentially
   // captured and moved by its own initializer.  If so, we'll need to
   // emit the initializer first, then copy into the variable.
-  bool capturedByInit =
-      Init && emission.IsEscapingByRef && isCapturedBy(D, Init);
+  bool capturedByInit = emission.IsCapturedByOwnInit;
 
   bool locIsByrefHeader = !capturedByInit;
   const Address Loc =
@@ -1951,8 +1890,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   llvm::Constant *constant = nullptr;
   if (emission.IsConstantAggregate ||
       D.mightBeUsableInConstantExpressions(getContext())) {
-    assert(!capturedByInit && "constant init contains a capturing block?");
     constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D);
+    assert(!(constant && capturedByInit) &&
+           "constant init contains a capturing block?");
     if (constant && !constant->isZeroValue() &&
         (trivialAutoVarInit !=
          LangOptions::TrivialAutoVarInitKind::Uninitialized)) {
@@ -1975,6 +1915,10 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
     initializeWhatIsTechnicallyUninitialized(Loc);
     LValue lv = MakeAddrLValue(Loc, type);
     lv.setNonGC(true);
+    if (emission.NeedsInitOnHeap) {
+      drillIntoBlockVariable(*this, lv, &D);
+      capturedByInit = false; // We don't need to drill again.
+    }
     return EmitExprAsInit(Init, &D, lv, capturedByInit);
   }
 
@@ -2023,6 +1967,9 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
     return;
   }
   case TEK_Aggregate:
+    assert(
+        !capturedByInit &&
+        "Capture-by-initializer should have been handled by NeedsInitOnHeap!");
     if (type->isAtomicType()) {
       EmitAtomicInit(const_cast<Expr*>(init), lvalue);
     } else {
@@ -2031,7 +1978,6 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
         Overlap = AggValueSlot::DoesNotOverlap;
       else if (auto *FD = dyn_cast<FieldDecl>(D))
         Overlap = getOverlapForFieldInit(FD);
-      // TODO: how can we delay here if D is captured by its initializer?
       EmitAggExpr(init,
                   AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed,
                                           AggValueSlot::DoesNotNeedGCBarriers,
@@ -2120,27 +2066,33 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
   // us from jumping into any of these scopes anyway.
   if (!HaveInsertPoint()) return;
 
-  const VarDecl &D = *emission.Variable;
+  // If we're initializing directly on the heap, _Block_object_destroy will
+  // handle destruction, so we don't need to perform cleanups directly.
+  if (!emission.NeedsInitOnHeap) {
+    const VarDecl &D = *emission.Variable;
 
-  // Check the type for a cleanup.
-  if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext()))
-    emitAutoVarTypeCleanup(emission, dtorKind);
+    // Check the type for a cleanup.
+    if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext()))
+      emitAutoVarTypeCleanup(emission, dtorKind);
 
-  // In GC mode, honor objc_precise_lifetime.
-  if (getLangOpts().getGC() != LangOptions::NonGC &&
-      D.hasAttr<ObjCPreciseLifetimeAttr>()) {
-    EHStack.pushCleanup<ExtendGCLifetime>(NormalCleanup, &D);
-  }
+    // In GC mode, honor objc_precise_lifetime.
+    if (getLangOpts().getGC() != LangOptions::NonGC &&
+        D.hasAttr<ObjCPreciseLifetimeAttr>()) {
+      EHStack.pushCleanup<ExtendGCLifetime>(NormalCleanup, &D);
+    }
 
-  // Handle the cleanup attribute.
-  if (const CleanupAttr *CA = D.getAttr<CleanupAttr>()) {
-    const FunctionDecl *FD = CA->getFunctionDecl();
+    // Handle the cleanup attribute.
+    if (const CleanupAttr *CA = D.getAttr<CleanupAttr>()) {
+      const FunctionDecl *FD = CA->getFunctionDecl();
 
-    llvm::Constant *F = CGM.GetAddrOfFunction(FD);
-    assert(F && "Could not find function!");
+      llvm::Constant *F = CGM.GetAddrOfFunction(FD);
+      assert(F && "Could not find function!");
 
-    const CGFunctionInfo &Info = CGM.getTypes().arrangeFunctionDeclaration(FD);
-    EHStack.pushCleanup<CallCleanupFunction>(NormalAndEHCleanup, F, &Info, &D);
+      const CGFunctionInfo &Info =
+          CGM.getTypes().arrangeFunctionDeclaration(FD);
+      EHStack.pushCleanup<CallCleanupFunction>(NormalAndEHCleanup, F, &Info,
+                                               &D);
+    }
   }
 
   // If this is a block variable, call _Block_object_destroy
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 5f3ee7eb943f9..17dcd37f91271 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2313,6 +2313,7 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   class AutoVarEmission;
 
+  void emitByrefInitOnHeap(llvm::Value *P);
   void emitByrefStructureInit(const AutoVarEmission &emission);
 
   /// Enter a cleanup to destroy a __block variable.  Note that this
@@ -3356,6 +3357,14 @@ class CodeGenFunction : public CodeGenTypeCache {
     /// escaping block.
     bool IsEscapingByRef;
 
+    /// True if the variable is a __block variable that is captured by a block
+    // referenced in its own initializer.
+    bool IsCapturedByOwnInit;
+
+    /// True if the variable is a __block variable that needs to be initialized
+    /// directly on the heap.
+    bool NeedsInitOnHeap;
+
     /// True if the variable is of aggregate type and has a constant
     /// initializer.
     bool IsConstantAggregate;
@@ -3374,7 +3383,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 
     AutoVarEmission(const VarDecl &variable)
         : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
-          IsEscapingByRef(false), IsConstantAggregate(false),
+          IsEscapingByRef(false), IsCapturedByOwnInit(false),
+          NeedsInitOnHeap(false), IsConstantAggregate(false),
           SizeForLifetimeMarkers(nullptr), AllocaAddr(RawAddress::invalid()) {}
 
     bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 0f68418130ead..7eec0f69d0ad1 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -84,6 +84,7 @@ class InitSegAttr;
 
 namespace CodeGen {
 
+class BlockByrefInfo;
 class CodeGenFunction;
 class CodeGenTBAA;
 class CGCXXABI;
@@ -257,13 +258,16 @@ class BlockByrefHelpers : public llvm::FoldingSetNode {
   /// have different helper functions.
   CharUnits Alignment;
 
-  BlockByrefHelpers(CharUnits alignment)
-      : CopyHelper(nullptr), DisposeHelper(nullptr), Alignment(alignment) {}
+  /// Emit a trivial copy helper; true if the variable is NeedsInitOnHeap.
+  bool ForceNoopCopy;
+
+  BlockByrefHelpers(const BlockByrefInfo &byrefInfo);
   BlockByrefHelpers(const BlockByrefHelpers &) = default;
   virtual ~BlockByrefHelpers();
 
   void Profile(llvm::FoldingSetNodeID &id) const {
     id.AddInteger(Alignment.getQuantity());
+    id.AddInteger(ForceNoopCopy);
     profileImpl(id);
   }
   virtual void profileImpl(llvm::FoldingSetNodeID &id) const = 0;
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 2c5774da3f666..15fcc287d68da 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -2177,6 +2177,13 @@ static void checkEscapingByref(VarDecl *VD, Sema &S) {
   SourceLocation Loc = VD->getLocation();
   Expr *VarRef =
       new (S.Context) DeclRefExpr(S.Context, VD, false, T, VK_LValue, Loc);
+  // Note: If needsInitOnHeap is true, the move/copy constructor will not
+  // actually be called, so in theory we don't have to require that it exists.
+  // But needsInitOnHeap is based on a conservative estimate of __block
+  // variables that might be retained in their own initializers; it could be
+  // refined in the future, which could cause variables to suddenly start
+  // requiring move constructors again.  To avoid the backwards compatibility
+  // hazard, require a move/copy constructor regardless of needsInitOnHeap.
   ExprResult Result;
   auto IE = InitializedEntity::InitializeBlock(Loc, T);
   if (S.getLangOpts().CPlusPlus23) {
@@ -2204,6 +2211,62 @@ static void checkEscapingByref(VarDecl *VD, Sema &S) {
     }
 }
 
+namespace {
+// Find blocks within a __block variable's initializer that capture that __block
+// variable.
+class CapturedByOwnInitVisitor
+    : public EvaluatedExprVisitor<CapturedByOwnInitVisitor> {
+  typedef EvaluatedExprVisitor<CapturedByOwnInitVisitor> Inherited;
+  VarDecl *const VD;
+
+public:
+  const BlockExpr *FoundBE;
+  CapturedByOwnInitVisitor(Sema &S, VarDecl *VD)
+      : Inherited(S.getASTContext()), VD(VD), FoundBE(nullptr) {}
+
+  void VisitBlockExpr(const BlockExpr *BE) {
+    if (FoundBE)
+      return; // No more work to do if already found.
+    for (const BlockDecl::Capture &BC : BE->getBlockDecl()->captures()) {
+      if (BC.getVariable() == VD) {
+        FoundBE = BE;
+        return;
+      }
+    }
+  }
+};
+} // namespace
+
+static void checkCapturedByOwnInit(VarDecl *VD, Sema &S) {
+  Expr *I = VD->getInit();
+  if (!I)
+    return;
+
+  // Check whether the variable is captured by a block literal within its own
+  // initializer.
+  CapturedByOwnInitVisitor V(S, VD);
+  V.Visit(I);
+  if (!V.FoundBE)
+    return;
+
+  if (VD->hasConstantInitialization()) {
+    // Block literals with captures can't be constant-evaluated.  But we can
+    // still reach here if the initializer syntactically contains a block
+    // literal that's never actually evaluated.  And if it's never evaluated,
+    // then we don't need to care about the capture.
+    return;
+  }
+
+  // Mark as self-capturing.
+  VD->setCapturedByOwnInit();
+  // If this is a record type, then it will need an implicit heap
+  // allocation, so warn.
+  if (VD->needsInitOnHeap()) {
+    S.Diag(VD->getLocation(), diag::warn_implicit_block_var_alloc) << VD;
+    S.Diag(V.FoundBE->getCaretLocation(), diag::note_because_captured_by_block);
+  }
+}
+
 static void markEscapingByrefs(const FunctionScopeInfo &FSI, Sema &S) {
   // Set the EscapingByref flag of __block variables captured by
   // escaping blocks.
@@ -2233,6 +2296,9 @@ static void markEscapingByrefs(const FunctionScopeInfo &FSI, Sema &S) {
     // __block variables might require us to capture a copy-initializer.
     if (!VD->isEscapingByref())
       continue;
+
+    checkCapturedByOwnInit(VD, S);
+
     // It's currently invalid to ever have a __block variable with an
     // array type; should we diagnose that here?
     // Regardless, we don't want to ignore array nesting when
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index a6254b70560c3..d1ea9d4dbde48 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1629,6 +1629,7 @@ ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
         VarDeclBits.getNextBit();
 
     VD->NonParmVarDeclBits.EscapingByref = VarDeclBits.getNextBit();
+    VD->NonParmVarDeclBits.CapturedByOwnInit = VarDeclBits.getNextBit();
     HasDeducedType = VarDeclBits.getNextBit();
     VD->NonParmVarDeclBits.ImplicitParamKind =
         VarDeclBits.getNextBits(/*Width*/ 3);
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index c2f1d1b44241c..dc519fb9852ff 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -1153,6 +1153,7 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
     VarDeclBits.addBit(D->isPreviousDeclInSameBlockScope());
 
     VarDeclBits.addBit(D->isEscapingByref());
+    VarDeclBits.addBit(D->isCapturedByOwnInit());
     HasDeducedType = D->getType()->getContainedDeducedType();
     VarDeclBits.addBit(HasDeducedType);
 
@@ -2513,13 +2514,14 @@ void ASTWriter::WriteDeclAbbrevs() {
   // VarDecl
   Abv->Add(BitCodeAbbrevOp(
       BitCodeAbbrevOp::Fixed,
-      21)); // Packed Var Decl bits:  Linkage, ModulesCodegen,
+      22)); // Packed Var Decl bits:  Linkage, ModulesCodegen,
             // SClass, TSCSpec, InitStyle,
             // isARCPseudoStrong, IsThisDeclarationADemotedDefinition,
             // isExceptionVariable, isNRVOVariable, isCXXForRangeDecl,
             // isInline, isInlineSpecified, isConstexpr,
             // isInitCapture, isPrevDeclInSameScope,
-            // EscapingByref, HasDeducedType, ImplicitParamKind, isObjCForDecl
+            // EscapingByref, CapturedByOwnInit, HasDeducedType,
+            // ImplicitParamKind, isObjCForDecl
   Abv->Add(BitCodeAbbrevOp(0));                         // VarKind (local enum)
   // Type Source Info
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
diff --git a/clang/test/CodeGenCXX/block-capture.cpp b/clang/test/CodeGenCXX/block-capture.cpp
index c235d97bf5fbc..3d55a235ae559 100644
--- a/clang/test/CodeGenCXX/block-capture.cpp
+++ b/clang/test/CodeGenCXX/block-capture.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -x c++ -std=c++11 -fblocks -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple arm64e-apple-ios -x c++ -std=c++20 -fblocks -Wimplicit-block-var-alloc -verify -emit-llvm %s -o - | \
+// RUN:    FileCheck --implicit-check-not "call{{.*}}_ZN3Big" %s
 
 // CHECK: %struct.__block_byref_baz = type { ptr, ptr, i32, i32, i32 }
 // CHECK: [[baz:%[0-9a-z_]*]] = alloca %struct.__block_byref_baz
@@ -6,8 +7,128 @@
 // CHECK: store ptr [[baz]], ptr [[bazref]]
 // CHECK: call void @_Block_object_dispose(ptr [[baz]]
 
-int main() {
+int a() {
   __block int baz = [&]() { return 0; }();
-  ^{ (void)baz; };
+  (void)^{
+    (void)baz;
+  };
   return 0;
 }
+
+class Big {
+public:
+  Big(const Big &);
+  ~Big();
+
+private:
+  Big();
+  int s[100];
+};
+Big getBig(Big * (^)());
+
+// CHECK: define void @_Z11heapInitBigv
+// CHECK: call void @_Block_object_assign(ptr {{.*}}, ptr {{.*}}, i32 8)
+// CHECK: call void @_Block_object_dispose(ptr {{.*}}, i32 8)
+// CHECK: %B1 = getelementptr inbounds %struct.__block_byref_B, ptr %{{.*}}, i32 0, i32 6
+// CHECK: call void @_Z6getBigU13block_pointerFP3BigvE({{[^,]*}} %B1,
+// CHECK: call void @_Block_object_dispose
+// (no call to destructor, enforced by --implicit-check-not)
+
+// CHECK: define internal void @__Block_byref_object_copy_
+// (no call to copy constructor, enforced by --implicit-check-not)
+
+// CHECK: define internal void @__Block_byref_object_dispose_
+// CHECK: call {{.*}} @_ZN3BigD1Ev(
+
+void heapInitBig() {
+  // expected-warning at +1{{variable 'B' will be initialized in a heap allocation}}
+  __block Big B = ({ // Make sure this works with statement expressions.
+    getBig(
+        // expected-note at +1{{because it is captured by a block used in its own initializer}}
+        ^{
+          return &B;
+        });
+  });
+}
+
+struct Small {
+  int x[2];
+};
+extern Small getSmall(const void * (^)());
+extern Small getSmall(const void * (^)(), const void * (^)());
+extern Small getSmall(__SIZE_TYPE__);
+extern int getInt(const void *(^)());
+
+// CHECK: %S11 = getelementptr inbounds %struct.__block_byref_S1, ptr %{{.*}}, i32 0, i32 4
+// CHECK: [[call:%[0-9a-z_\.]*]] = call i64 @_Z8getSmallU13block_pointerFPKvvE(
+// CHECK: [[dive:%[0-9a-z_\.]*]] = getelementptr inbounds %struct.Small, ptr %S11, i32 0, i32 0
+// CHECK: store i64 [[call]], ptr [[dive]], align 8
+
+void heapInitSmallAtomic() {
+  // expected-warning at +1{{variable 'S1' will be initialized in a heap allocation}}
+  __block _Atomic(Small) S1 = getSmall(
+      // expected-note at +1{{because it is captured by a block used in its own initializer}}
+      ^const void * {
+        return &S1;
+      });
+}
+
+// With multiple blocks capturing the same variable, we only note the first one.
+// In theory it would be more helpful to note each block, but that would mess up
+// the grammar of the diagnostic.
+void heapInitTwoSmall() {
+  // expected-warning at +1{{variable 'S2' will be initialized in a heap allocation}}
+  __block Small S2 = getSmall(
+      // expected-note at +1{{because it is captured by a block used in its own initializer}}
+      ^const void * {
+        return &S2;
+      },
+      ^const void * {
+        return &S2;
+      });
+}
+
+// This used to cause an ICE because the type of the variable makes it eligible
+// for constant initialization (but the actual initializer doesn't).
+//
+// It's also not actually a self-reference, so we should not get the warning.
+// The block is not capturing itself; it's only capturing a pointer to itself
+// (thus, e.g., calling Block_copy would not make it safe to use after the
+// function returns).  You might expect C++ lifetime extension to affect this
+// but it doesn't.
+void constRef() {
+  __block const Small &S = getSmall(^const void * {
+    return &S;
+  });
+}
+
+// This is also not actually a self-reference because the block literal
+// is in an unevaluated expression.  We should not get the warning.
+void unevaluated() {
+  __block Small S = getSmall(sizeof(^const void * {
+    return &S;
+  }));
+}
+
+// These are not self-references because the initializers are
+// const-evaluated and the block literal happens to never get executed
+// (despite not being in an "unevaluated expression" type-system-wise).
+void constInits() {
+  __block constexpr const int I = true ? 1000 : getInt(^const void *{
+    return &I;
+  });
+  __block constexpr Small S = true ? Small{2000, 3000} : getSmall(^const void *{
+    return &S;
+  });
+  __block const Small &S2 = true ? Small{4000, 5000} : getSmall(^const void *{
+    return &S2;
+  });
+}
+
+// This is definitely not a self-reference.
+void irrelevantVariable() {
+  __block Small Irrelevant;
+  __block Small NotCaptured = getSmall(^const void * {
+    return &Irrelevant;
+  });
+}

>From 1a88c2d7a91443c6dac51b55776b7ce0ab8f47db Mon Sep 17 00:00:00 2001
From: ille-apple <ille at apple.com>
Date: Fri, 31 May 2024 16:00:21 -0700
Subject: [PATCH 2/2] Updates:

- Revamp heap-init dispose helper to use a conditional flag.

- Fix __attribute__((cleanup)).

- Revert the changes from the first commit in this PR that involve
  copying ForceNoopCopy to BlockByrefHelpers.  Upon review, this turned
  out to be unnecessary.
---
 clang/lib/CodeGen/CGBlocks.cpp          | 141 ++++++++++++++++--------
 clang/lib/CodeGen/CGBlocks.h            |   6 +-
 clang/lib/CodeGen/CGDecl.cpp            |  62 ++++++-----
 clang/lib/CodeGen/CodeGenFunction.h     |   3 +-
 clang/lib/CodeGen/CodeGenModule.h       |   8 +-
 clang/test/CodeGenCXX/block-capture.cpp |  81 ++++++++++++--
 6 files changed, 203 insertions(+), 98 deletions(-)

diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index 2b381a5301180..f0b21b46938df 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -45,13 +45,6 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
     name = name.substr(1);
 }
 
-BlockByrefHelpers::BlockByrefHelpers(const BlockByrefInfo &info)
-    : CopyHelper(nullptr), DisposeHelper(nullptr),
-      // The alignment we care about for the purposes of uniquing byref
-      // helpers is the alignment of the actual byref value field.
-      Alignment(info.ByrefAlignment.alignmentAtOffset(info.FieldOffset)),
-      ForceNoopCopy(info.ForceNoopCopy) {}
-
 // Anchor the vtable to this translation unit.
 BlockByrefHelpers::~BlockByrefHelpers() {}
 
@@ -2134,8 +2127,8 @@ class ObjectByrefHelpers final : public BlockByrefHelpers {
   BlockFieldFlags Flags;
 
 public:
-  ObjectByrefHelpers(const BlockByrefInfo &info, BlockFieldFlags flags)
-      : BlockByrefHelpers(info), Flags(flags) {}
+  ObjectByrefHelpers(CharUnits alignment, BlockFieldFlags flags)
+    : BlockByrefHelpers(alignment), Flags(flags) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2168,7 +2161,7 @@ class ObjectByrefHelpers final : public BlockByrefHelpers {
 /// Emits the copy/dispose helpers for an ARC __block __weak variable.
 class ARCWeakByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCWeakByrefHelpers(const BlockByrefInfo &info) : BlockByrefHelpers(info) {}
+  ARCWeakByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2189,7 +2182,7 @@ class ARCWeakByrefHelpers final : public BlockByrefHelpers {
 /// that's not of block-pointer type.
 class ARCStrongByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCStrongByrefHelpers(const BlockByrefInfo &info) : BlockByrefHelpers(info) {}
+  ARCStrongByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2225,8 +2218,8 @@ class ARCStrongByrefHelpers final : public BlockByrefHelpers {
 /// variable that's of block-pointer type.
 class ARCStrongBlockByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCStrongBlockByrefHelpers(const BlockByrefInfo &info)
-      : BlockByrefHelpers(info) {}
+  ARCStrongBlockByrefHelpers(CharUnits alignment)
+    : BlockByrefHelpers(alignment) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2255,9 +2248,9 @@ class CXXByrefHelpers final : public BlockByrefHelpers {
   const Expr *CopyExpr;
 
 public:
-  CXXByrefHelpers(const BlockByrefInfo &info, QualType type,
+  CXXByrefHelpers(CharUnits alignment, QualType type,
                   const Expr *copyExpr)
-      : BlockByrefHelpers(info), VarType(type), CopyExpr(copyExpr) {}
+    : BlockByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
 
   bool needsCopy() const override { return CopyExpr != nullptr; }
   void emitCopy(CodeGenFunction &CGF, Address destField,
@@ -2283,8 +2276,8 @@ class NonTrivialCStructByrefHelpers final : public BlockByrefHelpers {
   QualType VarType;
 
 public:
-  NonTrivialCStructByrefHelpers(const BlockByrefInfo &info, QualType type)
-      : BlockByrefHelpers(info), VarType(type) {}
+  NonTrivialCStructByrefHelpers(CharUnits alignment, QualType type)
+    : BlockByrefHelpers(alignment), VarType(type) {}
 
   void emitCopy(CodeGenFunction &CGF, Address destField,
                 Address srcField) override {
@@ -2343,7 +2336,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
     // Create a scope with an artificial location for the body of this function.
   auto AL = ApplyDebugLocation::CreateArtificial(CGF);
 
-  if (generator.needsCopy() && !generator.ForceNoopCopy) {
+  if (generator.needsCopy() && !byrefInfo.ForInitOnHeap) {
     // dst->x
     Address destField = CGF.GetAddrOfLocalVar(&Dst);
     destField = Address(CGF.Builder.CreateLoad(destField), byrefInfo.Type,
@@ -2412,9 +2405,30 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
     Address addr = CGF.GetAddrOfLocalVar(&Src);
     addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.Type,
                    byrefInfo.ByrefAlignment);
-    addr = CGF.emitBlockByrefAddress(addr, byrefInfo, false, "object");
 
+    llvm::BasicBlock *skipBB = nullptr;
+    if (byrefInfo.ForInitOnHeap) {
+      // For objects initialized on the heap, a separate field tracks whether
+      // the object has completed initialization.  If it's still false here,
+      // then initialization threw an exception, so we don't want to run the
+      // destructor.
+      skipBB = CGF.createBasicBlock("skip");
+      llvm::BasicBlock *runBB = CGF.createBasicBlock("run");
+
+      Address initializedAddr = CGF.Builder.CreateStructGEP(
+          addr, byrefInfo.IndexOfInitializedFlag, "byref.initialized");
+      llvm::Value *initialized =
+          CGF.Builder.CreateFlagLoad(initializedAddr.emitRawPointer(CGF));
+
+      CGF.Builder.CreateCondBr(initialized, runBB, skipBB);
+      CGF.EmitBlock(runBB);
+    }
+
+    addr = CGF.emitBlockByrefAddress(addr, byrefInfo, false, "object");
     generator.emitDispose(CGF, addr);
+
+    if (skipBB)
+      CGF.EmitBlock(skipBB);
   }
 
   CGF.FinishFunction();
@@ -2465,21 +2479,26 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
 
   auto &byrefInfo = getBlockByrefInfo(&var);
 
+  // The alignment we care about for the purposes of uniquing byref
+  // helpers is the alignment of the actual byref value field.
+  CharUnits valueAlignment =
+    byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset);
+
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr =
         CGM.getContext().getBlockVarCopyInit(&var).getCopyExpr();
     if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
 
-    return ::buildByrefHelpers(CGM, byrefInfo,
-                               CXXByrefHelpers(byrefInfo, type, copyExpr));
+    return ::buildByrefHelpers(
+        CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr));
   }
 
   // If type is a non-trivial C struct type that is non-trivial to
   // destructly move or destroy, build the copy and dispose helpers.
   if (type.isNonTrivialToPrimitiveDestructiveMove() == QualType::PCK_Struct ||
       type.isDestructedType() == QualType::DK_nontrivial_c_struct)
-    return ::buildByrefHelpers(CGM, byrefInfo,
-                               NonTrivialCStructByrefHelpers(byrefInfo, type));
+    return ::buildByrefHelpers(
+        CGM, byrefInfo, NonTrivialCStructByrefHelpers(valueAlignment, type));
 
   // Otherwise, if we don't have a retainable type, there's nothing to do.
   // that the runtime does extra copies.
@@ -2501,21 +2520,20 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     // byref routines.
     case Qualifiers::OCL_Weak:
       return ::buildByrefHelpers(CGM, byrefInfo,
-                                 ARCWeakByrefHelpers(byrefInfo));
+                                 ARCWeakByrefHelpers(valueAlignment));
 
     // ARC __strong __block variables need to be retained.
     case Qualifiers::OCL_Strong:
-      // Block pointers need to be copied, and there's no direct
-      // transfer possible.
       if (type->isBlockPointerType()) {
+        // Block pointers need to be copied, and there's no direct
+        // transfer possible.
         return ::buildByrefHelpers(CGM, byrefInfo,
-                                   ARCStrongBlockByrefHelpers(byrefInfo));
-
+                                   ARCStrongBlockByrefHelpers(valueAlignment));
+      } else {
         // Otherwise, we transfer ownership of the retain from the stack
         // to the heap.
-      } else {
         return ::buildByrefHelpers(CGM, byrefInfo,
-                                   ARCStrongByrefHelpers(byrefInfo));
+                                   ARCStrongByrefHelpers(valueAlignment));
       }
     }
     llvm_unreachable("fell out of lifetime switch!");
@@ -2535,7 +2553,7 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
     flags |= BLOCK_FIELD_IS_WEAK;
 
   return ::buildByrefHelpers(CGM, byrefInfo,
-                             ObjectByrefHelpers(byrefInfo, flags));
+                             ObjectByrefHelpers(valueAlignment, flags));
 }
 
 Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr,
@@ -2625,6 +2643,14 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
     size += CharUnits::fromQuantity(PointerSizeInBytes);
   }
 
+  unsigned indexOfInitializedFlag = UINT_MAX;
+  if (D->needsInitOnHeap()) {
+    /// uint8_t initialized;
+    types.push_back(Int8Ty);
+    size += CharUnits::fromQuantity(1);
+    indexOfInitializedFlag = types.size() - 1;
+  }
+
   // T x;
   llvm::Type *varTy = ConvertTypeForMem(Ty);
 
@@ -2654,38 +2680,54 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
   info.FieldIndex = types.size() - 1;
   info.FieldOffset = varOffset;
   info.ByrefAlignment = std::max(varAlign, getPointerAlign());
-
-  // If we're initializing directly on the heap, then we should emit a
-  // no-op copy helper, both because we don't need a real one (the
-  // object will never move), and because a real one would break the
-  // pre-init _Block_object_assign.
-  info.ForceNoopCopy = D->needsInitOnHeap();
+  info.ForInitOnHeap = D->needsInitOnHeap();
+  info.IndexOfInitializedFlag = indexOfInitializedFlag;
 
   auto pair = BlockByrefInfos.insert({D, info});
   assert(pair.second && "info was inserted recursively?");
   return pair.first->second;
 }
 
-void CodeGenFunction::emitByrefInitOnHeap(llvm::Value *P) {
+/// Allocate a byref on the heap (but don't initialize it yet).
+void CodeGenFunction::emitByrefHeapAlloc(llvm::Value *stackByref,
+                                         QualType declType) {
   // The object itself is initialized directly on the heap.  But for ABI
   // backwards compatibility reasons, we have to set up a fake byref struct on
   // the stack (with the structural components initialized but not the object
   // itself), then call _Block_object_assign to move it to the heap (which is
   // safe because we forced a no-op copy helper), then call
   // _Block_object_dispose to release the extra ref from _Block_object_assign.
-  //
-  // 'P' points to the fake byref struct.
-
   BlockFieldFlags flags = BLOCK_FIELD_IS_BYREF;
-  // Ignored out-parameter.  We'll use the forwarding pointer instead.
-  RawAddress out = CreateDefaultAlignTempAlloca(P->getType(), "initOnHeap.out");
+  RawAddress heapByref =
+      CreateDefaultAlignTempAlloca(stackByref->getType(), "initOnHeap.ptr");
 
-  llvm::Value *args[] = {Builder.CreateBitCast(out.getPointer(), VoidPtrTy),
-                         Builder.CreateBitCast(P, VoidPtrTy),
+  llvm::Value *args[] = {heapByref.getPointer(), stackByref,
                          llvm::ConstantInt::get(Int32Ty, flags.getBitMask())};
   EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
 
-  BuildBlockRelease(P, flags, false);
+  BuildBlockRelease(stackByref, flags, false);
+
+  // We need to add a cleanup as soon as the allocation happens, to ensure the
+  // allocation is freed if initialization throws.
+  if (CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+    enterByrefCleanup(NormalAndEHCleanup, heapByref, BLOCK_FIELD_IS_BYREF,
+                      /*LoadBlockVarAddr*/ true,
+                      cxxDestructorCanThrow(declType));
+  }
+}
+
+/// Set the initialized flag of a heap-initialized byref.
+void CodeGenFunction::emitByrefMarkInitialized(
+    const AutoVarEmission &emission) {
+  auto &info = getBlockByrefInfo(emission.Variable);
+  Address forwardingAddr =
+      Builder.CreateStructGEP(emission.Addr, 1, "forwarding");
+  Address heapByref(Builder.CreateLoad(forwardingAddr), info.Type,
+                    info.ByrefAlignment);
+
+  Address initializedAddr = Builder.CreateStructGEP(
+      heapByref, info.IndexOfInitializedFlag, "byref.initialized");
+  Builder.CreateFlagStore(true, initializedAddr.emitRawPointer(*this));
 }
 
 /// Initialize the structural components of a __block variable, i.e.
@@ -2795,8 +2837,11 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
     storeHeaderField(layoutInfo, getPointerSize(), "byref.layout");
   }
 
-  if (emission.NeedsInitOnHeap)
-    emitByrefInitOnHeap(pointer);
+  if (emission.NeedsInitOnHeap) {
+    V = llvm::ConstantInt::get(Int8Ty, 0);
+    storeHeaderField(V, CharUnits::fromQuantity(1), "byref.initialized");
+    emitByrefHeapAlloc(pointer, type);
+  }
 }
 
 void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags,
diff --git a/clang/lib/CodeGen/CGBlocks.h b/clang/lib/CodeGen/CGBlocks.h
index 526dbbfd9b38d..9b0d2087da31c 100644
--- a/clang/lib/CodeGen/CGBlocks.h
+++ b/clang/lib/CodeGen/CGBlocks.h
@@ -136,10 +136,14 @@ inline BlockFieldFlags operator|(BlockFieldFlag_t l, BlockFieldFlag_t r) {
 class BlockByrefInfo {
 public:
   llvm::StructType *Type;
+  /// Index of the field for the variable itself.
   unsigned FieldIndex;
   CharUnits ByrefAlignment;
   CharUnits FieldOffset;
-  bool ForceNoopCopy;
+  /// Whether this will be initialized directly on the heap.
+  bool ForInitOnHeap;
+  /// If ForInitOnHeap is true, index of the 'initialized' field.
+  unsigned IndexOfInitializedFlag;
 };
 
 /// Represents a type of copy/destroy operation that should be performed for an
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 0bcc8de49365e..f833de95fe3f2 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -2066,46 +2066,48 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
   // us from jumping into any of these scopes anyway.
   if (!HaveInsertPoint()) return;
 
-  // If we're initializing directly on the heap, _Block_object_destroy will
-  // handle destruction, so we don't need to perform cleanups directly.
-  if (!emission.NeedsInitOnHeap) {
-    const VarDecl &D = *emission.Variable;
+  const VarDecl &D = *emission.Variable;
 
+  if (emission.NeedsInitOnHeap) {
+    // If this is a __block variable that we initialized directly on the heap,
+    // then the stack object is uninitialized and doesn't need destruction.
+    // But we do need to tell the destroy helper that the real byref is
+    // successfully initialized.
+    emitByrefMarkInitialized(emission);
+  } else {
     // Check the type for a cleanup.
     if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext()))
       emitAutoVarTypeCleanup(emission, dtorKind);
 
-    // In GC mode, honor objc_precise_lifetime.
-    if (getLangOpts().getGC() != LangOptions::NonGC &&
-        D.hasAttr<ObjCPreciseLifetimeAttr>()) {
-      EHStack.pushCleanup<ExtendGCLifetime>(NormalCleanup, &D);
+    // If this is a block variable, call _Block_object_destroy (on the
+    // unforwarded address). Don't enter this cleanup if we're in pure-GC
+    // mode.
+    if (emission.IsEscapingByRef &&
+        CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+      BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
+      if (D.getType().isObjCGCWeak())
+        Flags |= BLOCK_FIELD_IS_WEAK;
+      enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags,
+                        /*LoadBlockVarAddr*/ false,
+                        cxxDestructorCanThrow(D.getType()));
     }
+  }
 
-    // Handle the cleanup attribute.
-    if (const CleanupAttr *CA = D.getAttr<CleanupAttr>()) {
-      const FunctionDecl *FD = CA->getFunctionDecl();
+  // In GC mode, honor objc_precise_lifetime.
+  if (getLangOpts().getGC() != LangOptions::NonGC &&
+      D.hasAttr<ObjCPreciseLifetimeAttr>()) {
+    EHStack.pushCleanup<ExtendGCLifetime>(NormalCleanup, &D);
+  }
 
-      llvm::Constant *F = CGM.GetAddrOfFunction(FD);
-      assert(F && "Could not find function!");
+  // Handle the cleanup attribute.
+  if (const CleanupAttr *CA = D.getAttr<CleanupAttr>()) {
+    const FunctionDecl *FD = CA->getFunctionDecl();
 
-      const CGFunctionInfo &Info =
-          CGM.getTypes().arrangeFunctionDeclaration(FD);
-      EHStack.pushCleanup<CallCleanupFunction>(NormalAndEHCleanup, F, &Info,
-                                               &D);
-    }
-  }
+    llvm::Constant *F = CGM.GetAddrOfFunction(FD);
+    assert(F && "Could not find function!");
 
-  // If this is a block variable, call _Block_object_destroy
-  // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC
-  // mode.
-  if (emission.IsEscapingByRef &&
-      CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
-    BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
-    if (emission.Variable->getType().isObjCGCWeak())
-      Flags |= BLOCK_FIELD_IS_WEAK;
-    enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags,
-                      /*LoadBlockVarAddr*/ false,
-                      cxxDestructorCanThrow(emission.Variable->getType()));
+    const CGFunctionInfo &Info = CGM.getTypes().arrangeFunctionDeclaration(FD);
+    EHStack.pushCleanup<CallCleanupFunction>(NormalAndEHCleanup, F, &Info, &D);
   }
 }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 17dcd37f91271..e7ed5281057d5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2313,7 +2313,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 
   class AutoVarEmission;
 
-  void emitByrefInitOnHeap(llvm::Value *P);
+  void emitByrefHeapAlloc(llvm::Value *fakeByref, QualType declType);
+  void emitByrefMarkInitialized(const AutoVarEmission &emission);
   void emitByrefStructureInit(const AutoVarEmission &emission);
 
   /// Enter a cleanup to destroy a __block variable.  Note that this
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 7eec0f69d0ad1..0f68418130ead 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -84,7 +84,6 @@ class InitSegAttr;
 
 namespace CodeGen {
 
-class BlockByrefInfo;
 class CodeGenFunction;
 class CodeGenTBAA;
 class CGCXXABI;
@@ -258,16 +257,13 @@ class BlockByrefHelpers : public llvm::FoldingSetNode {
   /// have different helper functions.
   CharUnits Alignment;
 
-  /// Emit a trivial copy helper; true if the variable is NeedsInitOnHeap.
-  bool ForceNoopCopy;
-
-  BlockByrefHelpers(const BlockByrefInfo &byrefInfo);
+  BlockByrefHelpers(CharUnits alignment)
+      : CopyHelper(nullptr), DisposeHelper(nullptr), Alignment(alignment) {}
   BlockByrefHelpers(const BlockByrefHelpers &) = default;
   virtual ~BlockByrefHelpers();
 
   void Profile(llvm::FoldingSetNodeID &id) const {
     id.AddInteger(Alignment.getQuantity());
-    id.AddInteger(ForceNoopCopy);
     profileImpl(id);
   }
   virtual void profileImpl(llvm::FoldingSetNodeID &id) const = 0;
diff --git a/clang/test/CodeGenCXX/block-capture.cpp b/clang/test/CodeGenCXX/block-capture.cpp
index 3d55a235ae559..0b193354d878d 100644
--- a/clang/test/CodeGenCXX/block-capture.cpp
+++ b/clang/test/CodeGenCXX/block-capture.cpp
@@ -1,5 +1,6 @@
-// RUN: %clang_cc1 -triple arm64e-apple-ios -x c++ -std=c++20 -fblocks -Wimplicit-block-var-alloc -verify -emit-llvm %s -o - | \
-// RUN:    FileCheck --implicit-check-not "call{{.*}}_ZN3Big" %s
+// RUN: %clang_cc1 -triple arm64e-apple-ios -x c++ -std=c++20 -fblocks -Wimplicit-block-var-alloc -fexceptions \
+// RUN:            -verify -emit-llvm %s -o - | \
+// RUN:    FileCheck --implicit-check-not "call{{.*}}_ZN3BigD" %s
 
 // CHECK: %struct.__block_byref_baz = type { ptr, ptr, i32, i32, i32 }
 // CHECK: [[baz:%[0-9a-z_]*]] = alloca %struct.__block_byref_baz
@@ -26,27 +27,36 @@ class Big {
 };
 Big getBig(Big * (^)());
 
+void myCleanup(void *);
+
 // CHECK: define void @_Z11heapInitBigv
+// CHECK: store i8 0, ptr {{.*}}initialized
 // CHECK: call void @_Block_object_assign(ptr {{.*}}, ptr {{.*}}, i32 8)
 // CHECK: call void @_Block_object_dispose(ptr {{.*}}, i32 8)
-// CHECK: %B1 = getelementptr inbounds %struct.__block_byref_B, ptr %{{.*}}, i32 0, i32 6
-// CHECK: call void @_Z6getBigU13block_pointerFP3BigvE({{[^,]*}} %B1,
+// CHECK: %B11 = getelementptr inbounds %struct.__block_byref_B1, ptr %{{.*}}, i32 0, i32 
+// CHECK: invoke void @_Z6getBigU13block_pointerFP3BigvE({{[^,]*}} %B11,
+//
+// CHECK: invoke.cont:
+// CHECK: store i1 true, ptr {{.*}}initialized
 // CHECK: call void @_Block_object_dispose
 // (no call to destructor, enforced by --implicit-check-not)
-
+//
 // CHECK: define internal void @__Block_byref_object_copy_
 // (no call to copy constructor, enforced by --implicit-check-not)
-
+//
 // CHECK: define internal void @__Block_byref_object_dispose_
+// CHECK: load {{.*}}initialized
+// CHECK: br {{.*}}label %skip
 // CHECK: call {{.*}} @_ZN3BigD1Ev(
-
+// CHECK: skip:
+//
 void heapInitBig() {
-  // expected-warning at +1{{variable 'B' will be initialized in a heap allocation}}
-  __block Big B = ({ // Make sure this works with statement expressions.
+  // expected-warning at +1{{variable 'B1' will be initialized in a heap allocation}}
+  __block Big B1 = ({ // Make sure this works with statement expressions.
     getBig(
         // expected-note at +1{{because it is captured by a block used in its own initializer}}
         ^{
-          return &B;
+          return &B1;
         });
   });
 }
@@ -59,8 +69,9 @@ extern Small getSmall(const void * (^)(), const void * (^)());
 extern Small getSmall(__SIZE_TYPE__);
 extern int getInt(const void *(^)());
 
-// CHECK: %S11 = getelementptr inbounds %struct.__block_byref_S1, ptr %{{.*}}, i32 0, i32 4
-// CHECK: [[call:%[0-9a-z_\.]*]] = call i64 @_Z8getSmallU13block_pointerFPKvvE(
+// CHECK: define {{.*}} @_Z19heapInitSmallAtomicv
+// CHECK: %S11 = getelementptr inbounds %struct.__block_byref_S1, ptr %{{.*}}, i32 0, i32
+// CHECK: [[call:%[0-9a-z_\.]*]] = invoke i64 @_Z8getSmallU13block_pointerFPKvvE(
 // CHECK: [[dive:%[0-9a-z_\.]*]] = getelementptr inbounds %struct.Small, ptr %S11, i32 0, i32 0
 // CHECK: store i64 [[call]], ptr [[dive]], align 8
 
@@ -88,6 +99,52 @@ void heapInitTwoSmall() {
       });
 }
 
+
+// Test __attribute__((cleanup)) with direct init on heap.  We expect this to
+// first call the cleanup function on the heap allocation, then call
+// _Block_object_dispose (which runs the destructor and frees the allocation).
+//
+// CHECK: define {{.*}} @_Z19heapInitWithCleanupv
+// CHECK: store i1 true, ptr {{.*}}initialized
+// CHECK: [[fwd:%.*]] = load ptr, ptr %forwarding
+// CHECK: [[obj:%.*]] = getelementptr inbounds %struct.__block_byref_B2, ptr [[fwd]], i32 0, i32 8
+// CHECK: invoke void @_Z9myCleanupPv({{.*}}[[obj]])
+// CHECK-NOT: ret
+// CHECK: call void @_Block_object_dispose(ptr %
+void heapInitWithCleanup() {
+  // expected-warning at +1{{variable 'B2' will be initialized in a heap allocation}}
+  __attribute__((cleanup(myCleanup))) __block Big B2 = getBig(
+      // expected-note at +1{{because it is captured by a block used in its own initializer}}
+      ^{
+        return &B2;
+      });
+}
+
+// Same as above but without direct init on heap; therefore, the stack copy
+// needs to be destroyed in addition to the above operations.  This must happen
+// after the cleanup call (and also happens after _Block_object_dispose, but
+// doesn't need to).
+//
+// CHECK: define {{.*}} @_Z22nonHeapInitWithCleanupv()
+// CHECK: [[fwd:%.*]] = load ptr, ptr %forwarding
+// CHECK: [[obj:%.*]] = getelementptr inbounds %struct.__block_byref_B3, ptr [[fwd]], i32 0, i32
+// CHECK: invoke void @_Z9myCleanupPv({{.*}}[[obj]])
+//
+// CHECK: invoke.cont:
+// CHECK: call void @_Block_object_dispose
+// CHECK: call {{.*}} @_ZN3BigD1Ev(
+//
+// CHECK: lpad:
+// CHECK: call void @_Block_object_dispose
+// CHECK: call {{.*}} @_ZN3BigD1Ev(
+//
+// CHECK: define internal void @__Block_byref_object_dispose_
+// CHECK: call {{.*}} @_ZN3BigD1Ev(
+void nonHeapInitWithCleanup() {
+  __attribute__((cleanup(myCleanup))) __block Big B3 = getBig(nullptr);
+  (void)^{ return &B3; };
+}
+
 // This used to cause an ICE because the type of the variable makes it eligible
 // for constant initialization (but the actual initializer doesn't).
 //



More information about the cfe-commits mailing list