[llvm] 703240c - [SROA] Maintain shadow/backing alloca when some slices are noncapturnig read-only calls to allow alloca partitioning/promotion

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 4 10:09:24 PST 2022


Author: Roman Lebedev
Date: 2022-03-04T21:08:43+03:00
New Revision: 703240c71fd640af7490069e8149d32d78d14da1

URL: https://github.com/llvm/llvm-project/commit/703240c71fd640af7490069e8149d32d78d14da1
DIFF: https://github.com/llvm/llvm-project/commit/703240c71fd640af7490069e8149d32d78d14da1.diff

LOG: [SROA] Maintain shadow/backing alloca when some slices are noncapturnig read-only calls to allow alloca partitioning/promotion

This is inspired by the original variant of D109749 by Graham Hunter,
but is a more general version.

Roughly, instead of promoting the alloca, we call it
a shadow/backing alloca, go through all it's slices,
clone(!) instructions that operated on it,
but make them operate on the cloned alloca,
and promote cloned alloca instead.

This keeps the shadow/backing alloca, and all the original instructions
around, which results in said shadow/backing alloca being
a perfect mirror/representation of the promoted alloca's content,
so calls that take the alloca as arguments (non-capturingly!)
can be supported.

For now, we require that the calls also don't modify the alloca's content,
but that is only to simplify the initial implementation,
and that will be supported in a follow-up.

Overall, this leads to *smaller* codesize:
https://llvm-compile-time-tracker.com/compare.php?from=a8b4f5bbab62091835205f3d648902432a4a5b58&to=aeae054055b125b011c1122f82c86457e159436f&stat=size-total
and is roughly neutral compile-time wise:
https://llvm-compile-time-tracker.com/compare.php?from=a8b4f5bbab62091835205f3d648902432a4a5b58&to=aeae054055b125b011c1122f82c86457e159436f&stat=instructions

Reviewed By: djtodoro

Differential Revision: https://reviews.llvm.org/D113520

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/SROA.cpp
    llvm/test/Transforms/SROA/non-capturing-call-readonly.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index eb56fea1a20bf..5c3a46c4903a0 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -167,6 +167,7 @@ class Slice {
   void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
 
   Use *getUse() const { return UseAndIsSplittable.getPointer(); }
+  void setUse(Use *U) { UseAndIsSplittable.setPointer(U); }
 
   bool isDead() const { return getUse() == nullptr; }
   void kill() { UseAndIsSplittable.setPointer(nullptr); }
@@ -218,7 +219,7 @@ class Slice {
 class llvm::sroa::AllocaSlices {
 public:
   /// Construct the slices of a particular alloca.
-  AllocaSlices(const DataLayout &DL, AllocaInst &AI);
+  AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed);
 
   /// Test whether a pointer to the allocation escapes our analysis.
   ///
@@ -270,6 +271,12 @@ class llvm::sroa::AllocaSlices {
     return DeadUseIfPromotable;
   }
 
+  void forgetTheDead() {
+    DeadUsers.clear();
+    DeadUseIfPromotable.clear();
+    DeadOperands.clear();
+  };
+
   /// Access the dead operands referring to this alloca.
   ///
   /// These are operands which have cannot actually be used to refer to the
@@ -295,11 +302,18 @@ class llvm::sroa::AllocaSlices {
 
   friend class AllocaSlices::SliceBuilder;
 
+  void formBackingAlloca(AllocaInst *AI, bool &Changed);
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Handle to alloca instruction to simplify method interfaces.
   AllocaInst &AI;
 #endif
 
+  /// Certain escaping uses of an alloca (non-capturing-ones)
+  /// do not prevent promotion, but force retention of the alloca.
+  /// This records if there are any such uses.
+  bool NeedsBackingAlloca = false;
+
   /// The instruction responsible for this alloca not having a known set
   /// of slices.
   ///
@@ -1062,11 +1076,22 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
 
   void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
 
+  void visitCallBase(CallBase &CB) {
+    if (!IsOffsetKnown || !CB.doesNotCapture(U->getOperandNo()))
+      return PI.setAborted(&CB);
+    // If we know that the callee does not retain the pointer,
+    // then it does not prevent SROA, although we have to workaround this.
+    // However, for now, only allow uses, that, at most, read from said memory.
+    if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(U->getOperandNo()))
+      return PI.setAborted(&CB);
+    AS.NeedsBackingAlloca = true;
+  }
+
   /// Disable SROA entirely if there are unhandled users of the alloca.
   void visitInstruction(Instruction &I) { PI.setAborted(&I); }
 };
 
-AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
+AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed)
     :
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
       AI(AI),
@@ -1083,6 +1108,10 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
     return;
   }
 
+  // We may have found that the pointer to the AI escapes, but isn't captured.
+  if (NeedsBackingAlloca)
+    formBackingAlloca(&AI, Changed);
+
   llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
 
   // Sort the uses. This arranges for the offsets to be in ascending order,
@@ -3587,6 +3616,122 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
 
 } // end anonymous namespace
 
+/// Apparently, we can promote the alloca, but some uses of the alloca
+/// are calls (that don't capture it's address), which require for the
+/// trace alloca to remain. To do so, we must form a new "backing" alloca,
+/// which will be kept as an up-to-date backup of the to-be-promoted-alloca's
+/// content, and used in it's place in these non-capturing calls.
+/// FIXME: support non-readonly non-capturing calls.
+void AllocaSlices::formBackingAlloca(AllocaInst *AllocaToPromote,
+                                     bool &Changed) {
+  assert(NeedsBackingAlloca &&
+         "Should not be called if there is no need to rewrite.");
+
+  // We are going to preserve all of the original instructions that were
+  // operating on the original alloca, so we must forget any instructions
+  // that were deemed as dead-to-be-deleted during normal promotion.
+  forgetTheDead();
+
+  Changed = true;
+
+  // Now, we want to retain all of the instructions operating on the original
+  // alloca, so to avoid much hassle, create a new alloca, and swap (RAUW) them.
+  AllocaInst *ShadowAlloca = cast<AllocaInst>(AllocaToPromote->clone());
+  ShadowAlloca->takeName(AllocaToPromote);
+  AllocaToPromote->setName(ShadowAlloca->getName() + ".prom");
+  ShadowAlloca->insertBefore(AllocaToPromote);
+  AllocaToPromote->replaceAllUsesWith(ShadowAlloca);
+
+  // Avoid recomputing the same pointer over and over again, cache it.
+  SmallDenseMap<std::pair<uint64_t, Type *>, Value *> RebasedPtrsCSE;
+
+  // Don't do anything fancy, just put new insts "right after" the alloca.
+  IRBuilderTy Builder(AllocaToPromote->getContext());
+  BasicBlock *AllocaToPromoteBB = AllocaToPromote->getParent();
+  Builder.SetInsertPoint(AllocaToPromoteBB,
+                         AllocaToPromoteBB->getFirstInsertionPt());
+
+  // Give a pointer `Offset` bytes into the `AllocaToPromote` with `PtrTy` type.
+  auto getRebasedPtr = [&RebasedPtrsCSE, &Builder, AllocaToPromote,
+                        DL = AllocaToPromote->getModule()->getDataLayout()](
+                           PointerType *PtrTy, const uint64_t Offset) {
+    // Look it up in a cache first.
+    auto It = RebasedPtrsCSE.find({Offset, PtrTy});
+    if (It != RebasedPtrsCSE.end())
+      return It->second;
+
+    // Otherwise, create a new pointer, and cache it for the future.
+    Value *NewPtr = getAdjustedPtr(
+        Builder, DL, AllocaToPromote,
+        APInt(DL.getIndexSizeInBits(PtrTy->getAddressSpace()), Offset), PtrTy,
+        "");
+    RebasedPtrsCSE[{Offset, PtrTy}] = NewPtr;
+
+    return NewPtr;
+  };
+
+  // Some instructions may have several uses of an alloca, and there's
+  // a separate slice for each use, so we must cache each instruction
+  // we clone, so that we only clone it once,
+  // not for each slice that references it.
+  SmallDenseMap<Instruction *, Instruction *> InstrCloneMap;
+
+  // Now, let's just deal with each slice. Roughly, we need to clone each
+  // instruction that is referenced by a slice (once per instruction!),
+  // and change the appropriate pointer from pointing at the shadow alloca
+  // into pointing into the alloca we are going to promote.
+  //
+  // NOTE: the original instruction is generally preserved,
+  // because we need to maintain the content parity between the two allocas!
+  for (Slice &S : Slices) {
+    // Just completely ignore dead slices.
+    if (S.isDead())
+      continue;
+
+    // Which instruction does this slice represent?
+    Use *OrigUse = S.getUse();
+    auto *OrigInstr = cast<Instruction>(OrigUse->getUser());
+
+    // Now, we need to make a clone of this instruction, but operating on
+    // the alloca-to-be-promoted instead.
+    Instruction *ClonedInstr;
+    // Only clone instruction once! See if we already did that for this instr.
+    auto It = InstrCloneMap.find(OrigInstr);
+    if (It != InstrCloneMap.end())
+      ClonedInstr = It->second;
+    else {
+      // This is the first time this instruction is seen.
+      // Clone it next to the original instruction, and cache it.
+      ClonedInstr = OrigInstr->clone();
+      ClonedInstr->insertBefore(OrigInstr);
+      InstrCloneMap.insert({OrigInstr, ClonedInstr});
+
+      // Also, if the instruction was returning anything, we do that instead.
+      if (!ClonedInstr->getType()->isVoidTy()) {
+        assert(isa<LoadInst>(OrigInstr) &&
+               "Not expecting to encounter here anything other than a `load`.");
+        ClonedInstr->setName(OrigInstr->getName() + ".prom");
+        OrigInstr->replaceAllUsesWith(ClonedInstr);
+      }
+
+      if (isa<LoadInst>(OrigInstr))
+        // We know that all the offending (non-capturing) calls do not modify
+        // the content of the shadow alloca, so we do not need to propagate
+        // the content of the shadow alloca to the alloca-to-be-promoted.
+        DeadUsers.push_back(OrigInstr);
+    }
+
+    // Final touch: the slice should refer to the
+    // use of the alloca-to-be-promoted, while it currently refers to
+    // use of the shadow alloca, so rectify that.
+    Value *NewPtr = getRebasedPtr(cast<PointerType>(OrigUse->get()->getType()),
+                                  S.beginOffset());
+    Use &ClonedUse = ClonedInstr->getOperandUse(OrigUse->getOperandNo());
+    ClonedUse.set(NewPtr);
+    S.setUse(&ClonedUse);
+  }
+}
+
 /// Strip aggregate type wrapping.
 ///
 /// This removes no-op aggregate types wrapping an underlying type. It will
@@ -4612,7 +4757,7 @@ bool SROAPass::runOnAlloca(AllocaInst &AI) {
   Changed |= AggRewriter.rewrite(AI);
 
   // Build the slices using a recursive instruction-visiting builder.
-  AllocaSlices AS(DL, AI);
+  AllocaSlices AS(DL, AI, Changed);
   LLVM_DEBUG(AS.print(dbgs()));
   if (AS.isEscaped())
     return Changed;

diff  --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
index c3c8fb4f7f43b..2e27d13122ce8 100644
--- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
+++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll
@@ -9,19 +9,18 @@ define i32 @alloca_used_in_call(i32* %data, i64 %n) {
 ; CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca(i32* [[RETVAL]])
-; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    ret i32 [[I1]]
+; CHECK-NEXT:    ret i32 [[RDX_INC]]
 ;
 ; CHECK-OPAQUE-LABEL: @alloca_used_in_call(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -29,19 +28,18 @@ define i32 @alloca_used_in_call(i32* %data, i64 %n) {
 ; CHECK-OPAQUE-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-OPAQUE-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]])
-; CHECK-OPAQUE-NEXT:    [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    ret i32 [[I1]]
+; CHECK-OPAQUE-NEXT:    ret i32 [[RDX_INC]]
 ;
 entry:
   %retval = alloca i32, align 4
@@ -198,19 +196,18 @@ define i32 @alloca_not_captured_and_readonly_as_per_operand_attr(i32* %data, i64
 ; CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @capture_of_alloca(i32* nocapture readonly [[RETVAL]])
-; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    ret i32 [[I1]]
+; CHECK-NEXT:    ret i32 [[RDX_INC]]
 ;
 ; CHECK-OPAQUE-LABEL: @alloca_not_captured_and_readonly_as_per_operand_attr(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -218,19 +215,18 @@ define i32 @alloca_not_captured_and_readonly_as_per_operand_attr(i32* %data, i64
 ; CHECK-OPAQUE-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-OPAQUE-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @capture_of_alloca(ptr nocapture readonly [[RETVAL]])
-; CHECK-OPAQUE-NEXT:    [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    ret i32 [[I1]]
+; CHECK-OPAQUE-NEXT:    ret i32 [[RDX_INC]]
 ;
 entry:
   %retval = alloca i32, align 4
@@ -261,19 +257,18 @@ define i32 @alloca_not_captured_as_per_operand_attr_and_readonly_as_per_callbase
 ; CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @capture_of_alloca(i32* nocapture [[RETVAL]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    ret i32 [[I1]]
+; CHECK-NEXT:    ret i32 [[RDX_INC]]
 ;
 ; CHECK-OPAQUE-LABEL: @alloca_not_captured_as_per_operand_attr_and_readonly_as_per_callbase_attr(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -281,19 +276,18 @@ define i32 @alloca_not_captured_as_per_operand_attr_and_readonly_as_per_callbase
 ; CHECK-OPAQUE-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-OPAQUE-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @capture_of_alloca(ptr nocapture [[RETVAL]]) #[[ATTR2:[0-9]+]]
-; CHECK-OPAQUE-NEXT:    [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    ret i32 [[I1]]
+; CHECK-OPAQUE-NEXT:    ret i32 [[RDX_INC]]
 ;
 entry:
   %retval = alloca i32, align 4
@@ -387,11 +381,11 @@ define i32 @alloca_with_gep_used_in_call(i32* %data, i64 %n) {
 ; CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -399,8 +393,7 @@ define i32 @alloca_with_gep_used_in_call(i32* %data, i64 %n) {
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[RETVAL]], i32 0
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca(i32* [[GEP]])
-; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    ret i32 [[I1]]
+; CHECK-NEXT:    ret i32 [[RDX_INC]]
 ;
 ; CHECK-OPAQUE-LABEL: @alloca_with_gep_used_in_call(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -408,11 +401,11 @@ define i32 @alloca_with_gep_used_in_call(i32* %data, i64 %n) {
 ; CHECK-OPAQUE-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -420,8 +413,7 @@ define i32 @alloca_with_gep_used_in_call(i32* %data, i64 %n) {
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[RETVAL]], i32 0
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca(ptr [[GEP]])
-; CHECK-OPAQUE-NEXT:    [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    ret i32 [[I1]]
+; CHECK-OPAQUE-NEXT:    ret i32 [[RDX_INC]]
 ;
 entry:
   %retval = alloca i32, align 4
@@ -516,11 +508,11 @@ define i32 @alloca_used_in_maybe_throwing_call(i32* %data, i64 %n) personality i
 ; CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -535,8 +527,7 @@ define i32 @alloca_used_in_maybe_throwing_call(i32* %data, i64 %n) personality i
 ; CHECK-NEXT:    catch i8* null
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    ret i32 [[I2]]
+; CHECK-NEXT:    ret i32 [[RDX_INC]]
 ;
 ; CHECK-OPAQUE-LABEL: @alloca_used_in_maybe_throwing_call(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -544,11 +535,11 @@ define i32 @alloca_used_in_maybe_throwing_call(i32* %data, i64 %n) personality i
 ; CHECK-OPAQUE-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -563,8 +554,7 @@ define i32 @alloca_used_in_maybe_throwing_call(i32* %data, i64 %n) personality i
 ; CHECK-OPAQUE-NEXT:    catch ptr null
 ; CHECK-OPAQUE-NEXT:    br label [[END]]
 ; CHECK-OPAQUE:       end:
-; CHECK-OPAQUE-NEXT:    [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    ret i32 [[I2]]
+; CHECK-OPAQUE-NEXT:    ret i32 [[RDX_INC]]
 ;
 entry:
   %retval = alloca i32, align 4
@@ -604,11 +594,11 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(i32* %data, i64 %
 ; CHECK-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -621,8 +611,7 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(i32* %data, i64 %
 ; CHECK-NEXT:    catch i8* null
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    ret i32 [[I2]]
+; CHECK-NEXT:    ret i32 [[RDX_INC]]
 ;
 ; CHECK-OPAQUE-LABEL: @alloca_used_in_maybe_throwing_call_with_same_dests(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -630,11 +619,11 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(i32* %data, i64 %
 ; CHECK-OPAQUE-NEXT:    store i32 0, ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_PROM_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -647,8 +636,7 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(i32* %data, i64 %
 ; CHECK-OPAQUE-NEXT:    catch ptr null
 ; CHECK-OPAQUE-NEXT:    br label [[END]]
 ; CHECK-OPAQUE:       end:
-; CHECK-OPAQUE-NEXT:    [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    ret i32 [[I2]]
+; CHECK-OPAQUE-NEXT:    ret i32 [[RDX_INC]]
 ;
 entry:
   %retval = alloca i32, align 4
@@ -689,23 +677,19 @@ define [2 x i32] @part_of_alloca_used_in_call(i32* %data, i64 %n) {
 ; CHECK-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca(i32* [[RETVAL]])
-; CHECK-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 ; CHECK-OPAQUE-LABEL: @part_of_alloca_used_in_call(
@@ -718,23 +702,19 @@ define [2 x i32] @part_of_alloca_used_in_call(i32* %data, i64 %n) {
 ; CHECK-OPAQUE-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-OPAQUE-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]])
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-OPAQUE-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 entry:
@@ -772,23 +752,19 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args(i32* %data
 ; CHECK-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* [[RETVAL]], i32* [[RETVAL_BASE]])
-; CHECK-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 ; CHECK-OPAQUE-LABEL: @all_parts_of_alloca_used_in_call_with_multiple_args(
@@ -802,23 +778,19 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args(i32* %data
 ; CHECK-OPAQUE-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-OPAQUE-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL_BASE]])
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-OPAQUE-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 entry:
@@ -857,11 +829,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_FULL_PROM_SROA_3_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_3_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -869,12 +841,8 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK:       exit:
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i32.p0i32.i32(i32* [[RETVAL_BASE]], i32* [[RETVAL]], i32 4, i1 false)
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* [[RETVAL]], i32* [[RETVAL_BASE]])
-; CHECK-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[RDX_INC]], 0
+; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 ; CHECK-OPAQUE-LABEL: @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcpy_before_call(
@@ -888,11 +856,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK-OPAQUE-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_FULL_PROM_SROA_3_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_3_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -900,12 +868,8 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[RETVAL_BASE]], ptr [[RETVAL]], i32 4, i1 false)
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL_BASE]])
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[RDX_INC]], 0
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-OPAQUE-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 entry:
@@ -945,11 +909,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_FULL_PROM_SROA_3_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_3_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -957,12 +921,8 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* [[RETVAL]], i32* [[RETVAL_BASE]])
 ; CHECK-NEXT:    call void @llvm.memcpy.p0i32.p0i32.i32(i32* [[RETVAL_BASE]], i32* [[RETVAL]], i32 4, i1 false)
-; CHECK-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[RDX_INC]], 0
+; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 ; CHECK-OPAQUE-LABEL: @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcpy_after_call(
@@ -976,11 +936,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK-OPAQUE-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_FULL_PROM_SROA_3_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_3_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -988,12 +948,8 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args_with_memcp
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL_BASE]])
 ; CHECK-OPAQUE-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr [[RETVAL_BASE]], ptr [[RETVAL]], i32 4, i1 false)
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[RDX_INC]], 0
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-OPAQUE-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 entry:
@@ -1032,23 +988,19 @@ define [2 x i32] @part_of_alloca_used_in_call_with_multiple_args(i32* %data, i64
 ; CHECK-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* [[RETVAL]], i32* [[RETVAL]])
-; CHECK-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4
-; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 ; CHECK-OPAQUE-LABEL: @part_of_alloca_used_in_call_with_multiple_args(
@@ -1061,23 +1013,19 @@ define [2 x i32] @part_of_alloca_used_in_call_with_multiple_args(i32* %data, i64
 ; CHECK-OPAQUE-NEXT:    [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
 ; CHECK-OPAQUE-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK-OPAQUE:       exit:
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL]])
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-OPAQUE-NEXT:    [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-OPAQUE-NEXT:    ret [2 x i32] [[I1_FCA_1_INSERT]]
 ;
 entry:
@@ -1117,11 +1065,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(i32* %dat
 ; CHECK-NEXT:    [[SOME_ANOTHER_ALLOCA:%.*]] = getelementptr inbounds [42 x i32], [42 x i32]* [[SOME_ANOTHER_ALLOCA_FULL]], i64 0, i64 0
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4
-; CHECK-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-NEXT:    store i32 [[RDX_INC]], i32* [[RETVAL]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -1130,12 +1078,8 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(i32* %dat
 ; CHECK-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* [[RETVAL]], i32* [[RETVAL_BASE]])
 ; CHECK-NEXT:    [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* [[RETVAL_BASE]], i32* [[RETVAL]])
 ; CHECK-NEXT:    [[I2:%.*]] = call i32 @capture_of_alloca(i32* [[SOME_ANOTHER_ALLOCA]])
-; CHECK-NEXT:    [[I3_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-NEXT:    [[I3_FCA_0_LOAD:%.*]] = load i32, i32* [[I3_FCA_0_GEP]], align 4
-; CHECK-NEXT:    [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I3_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[I3_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-NEXT:    [[I3_FCA_1_LOAD:%.*]] = load i32, i32* [[I3_FCA_1_GEP]], align 4
-; CHECK-NEXT:    [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[I3_FCA_1_LOAD]], 1
+; CHECK-NEXT:    [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-NEXT:    [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-NEXT:    ret [2 x i32] [[I3_FCA_1_INSERT]]
 ;
 ; CHECK-OPAQUE-LABEL: @all_parts_of_alloca_used_in_calls_with_multiple_args(
@@ -1151,11 +1095,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(i32* %dat
 ; CHECK-OPAQUE-NEXT:    [[SOME_ANOTHER_ALLOCA:%.*]] = getelementptr inbounds [42 x i32], ptr [[SOME_ANOTHER_ALLOCA_FULL]], i64 0, i64 0
 ; CHECK-OPAQUE-NEXT:    br label [[LOOP:%.*]]
 ; CHECK-OPAQUE:       loop:
-; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[RETVAL_FULL_PROM_SROA_2_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ]
+; CHECK-OPAQUE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-OPAQUE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4
-; CHECK-OPAQUE-NEXT:    [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]]
+; CHECK-OPAQUE-NEXT:    [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_PROM_SROA_2_0]], [[LD]]
 ; CHECK-OPAQUE-NEXT:    store i32 [[RDX_INC]], ptr [[RETVAL]], align 4
 ; CHECK-OPAQUE-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
 ; CHECK-OPAQUE-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
@@ -1164,12 +1108,8 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(i32* %dat
 ; CHECK-OPAQUE-NEXT:    [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL_BASE]])
 ; CHECK-OPAQUE-NEXT:    [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL_BASE]], ptr [[RETVAL]])
 ; CHECK-OPAQUE-NEXT:    [[I2:%.*]] = call i32 @capture_of_alloca(ptr [[SOME_ANOTHER_ALLOCA]])
-; CHECK-OPAQUE-NEXT:    [[I3_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0
-; CHECK-OPAQUE-NEXT:    [[I3_FCA_0_LOAD:%.*]] = load i32, ptr [[I3_FCA_0_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I3_FCA_0_LOAD]], 0
-; CHECK-OPAQUE-NEXT:    [[I3_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1
-; CHECK-OPAQUE-NEXT:    [[I3_FCA_1_LOAD:%.*]] = load i32, ptr [[I3_FCA_1_GEP]], align 4
-; CHECK-OPAQUE-NEXT:    [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[I3_FCA_1_LOAD]], 1
+; CHECK-OPAQUE-NEXT:    [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0
+; CHECK-OPAQUE-NEXT:    [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[RDX_INC]], 1
 ; CHECK-OPAQUE-NEXT:    ret [2 x i32] [[I3_FCA_1_INSERT]]
 ;
 entry:
@@ -1204,6 +1144,7 @@ define i32 @all_uses_of_alloca_are_calls(i32* %data, i64 %n) {
 ; CHECK-LABEL: @all_uses_of_alloca_are_calls(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RETVAL_PROM:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @user_of_alloca(i32* [[RETVAL]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @user_of_alloca(i32* [[RETVAL]])
 ; CHECK-NEXT:    ret i32 0
@@ -1211,6 +1152,7 @@ define i32 @all_uses_of_alloca_are_calls(i32* %data, i64 %n) {
 ; CHECK-OPAQUE-LABEL: @all_uses_of_alloca_are_calls(
 ; CHECK-OPAQUE-NEXT:  entry:
 ; CHECK-OPAQUE-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-OPAQUE-NEXT:    [[RETVAL_PROM:%.*]] = alloca i32, align 4
 ; CHECK-OPAQUE-NEXT:    [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]])
 ; CHECK-OPAQUE-NEXT:    [[TMP1:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]])
 ; CHECK-OPAQUE-NEXT:    ret i32 0
@@ -1236,8 +1178,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAYDECAY]], i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @user_of_alloca(i32* [[ADD_PTR]])
-; CHECK-NEXT:    [[LD:%.*]] = load i64, i64* [[C]], align 4
-; CHECK-NEXT:    ret i64 [[LD]]
+; CHECK-NEXT:    ret i64 0
 ;
 ; CHECK-OPAQUE-LABEL: @do_schedule_instrs_for_dce_after_fixups(
 ; CHECK-OPAQUE-NEXT:  entry:
@@ -1250,8 +1191,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
 ; CHECK-OPAQUE:       if.end:
 ; CHECK-OPAQUE-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 1
 ; CHECK-OPAQUE-NEXT:    [[TMP1:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
-; CHECK-OPAQUE-NEXT:    [[LD:%.*]] = load i64, ptr [[C]], align 4
-; CHECK-OPAQUE-NEXT:    ret i64 [[LD]]
+; CHECK-OPAQUE-NEXT:    ret i64 0
 ;
 entry:
   %c = alloca i64, align 2
@@ -1294,15 +1234,13 @@ define i8 @dont_transform_load_only() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    call void @byte_user_of_alloca(i8* [[A]])
-; CHECK-NEXT:    [[R:%.*]] = load i8, i8* [[A]], align 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 undef
 ;
 ; CHECK-OPAQUE-LABEL: @dont_transform_load_only(
 ; CHECK-OPAQUE-NEXT:  entry:
 ; CHECK-OPAQUE-NEXT:    [[A:%.*]] = alloca i8, align 1
 ; CHECK-OPAQUE-NEXT:    call void @byte_user_of_alloca(ptr [[A]])
-; CHECK-OPAQUE-NEXT:    [[R:%.*]] = load i8, ptr [[A]], align 1
-; CHECK-OPAQUE-NEXT:    ret i8 [[R]]
+; CHECK-OPAQUE-NEXT:    ret i8 undef
 ;
 entry:
   %a = alloca i8
@@ -1316,16 +1254,14 @@ define i8 @transform_load_and_store() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    store i8 0, i8* [[A]], align 1
 ; CHECK-NEXT:    call void @byte_user_of_alloca(i8* [[A]])
-; CHECK-NEXT:    [[R:%.*]] = load i8, i8* [[A]], align 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
 ; CHECK-OPAQUE-LABEL: @transform_load_and_store(
 ; CHECK-OPAQUE-NEXT:  entry:
 ; CHECK-OPAQUE-NEXT:    [[A:%.*]] = alloca i8, align 1
 ; CHECK-OPAQUE-NEXT:    store i8 0, ptr [[A]], align 1
 ; CHECK-OPAQUE-NEXT:    call void @byte_user_of_alloca(ptr [[A]])
-; CHECK-OPAQUE-NEXT:    [[R:%.*]] = load i8, ptr [[A]], align 1
-; CHECK-OPAQUE-NEXT:    ret i8 [[R]]
+; CHECK-OPAQUE-NEXT:    ret i8 0
 ;
 entry:
   %a = alloca i8


        


More information about the llvm-commits mailing list