[llvm] 713c70d - [VPlan] Handle regions with live-outs and scalar VF when replicating. (#186252)

Mon Mar 30 05:23:29 PDT 2026

Author: Florian Hahn
Date: 2026-03-30T13:23:23+01:00
New Revision: 713c70d7ef50400e3533be05b4f08dc675dc012b

URL: https://github.com/llvm/llvm-project/commit/713c70d7ef50400e3533be05b4f08dc675dc012b
DIFF: https://github.com/llvm/llvm-project/commit/713c70d7ef50400e3533be05b4f08dc675dc012b.diff

LOG: [VPlan] Handle regions with live-outs and scalar VF when replicating. (#186252)

Extend intial unrolling of replicate regions
(https://github.com/llvm/llvm-project/pull/170212) to support live-outs,
if the VF is scalar.

This allows adding the logic needed to explicitly unroll, and replacing
VPPredPhiInsts with regular scalar VPPhi, without yet having to worry
about packing values into vector phis. This will be done in a follow-up
change, which means all replicate regions will be fully dissolved.

PR: https://github.com/llvm/llvm-project/pull/186252

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
    llvm/test/Transforms/LoopVectorize/VPlan/dissolve-replicate-regions.ll
    llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
    llvm/test/Transforms/LoopVectorize/cast-induction.ll
    llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
    llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 867c7c8ef5045..50b05a9e9c0fd 100644

--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
@@ -669,27 +670,33 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
 /// VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
 /// converted into BranchOnCond and extracts are created as needed.
 static void convertRecipesInRegionBlocksToSingleScalar(VPlan &Plan, Type *IdxTy,
-                                                       VPBlockBase *Entry) {
+                                                       VPBlockBase *Entry,
+                                                       ElementCount VF) {
   VPValue *Idx0 = Plan.getZero(IdxTy);
+  VPTypeAnalysis TypeInfo(Plan);
   for (VPBlockBase *VPB : vp_depth_first_shallow(Entry)) {
     for (VPRecipeBase &OldR : make_early_inc_range(cast<VPBasicBlock>(*VPB))) {
       VPBuilder Builder(&OldR);
       assert(!match(&OldR, m_ExtractElement(m_VPValue(), m_VPValue())) &&
              "must not contain extracts before conversion");
-      for (const auto &[I, Op] : enumerate(OldR.operands())) {
-        // Skip operands that don't need extraction: values defined in the
-        // same block (already scalar), or values that are already single
-        // scalars.
-        auto *DefR = Op->getDefiningRecipe();
-        if ((isa_and_present<VPScalarIVStepsRecipe>(DefR) &&
-             DefR->getParent() == VPB) ||
-            vputils::isSingleScalar(Op))
-          continue;
 
-        // Extract lane zero from values defined outside the region.
-        VPValue *Extract = Builder.createNaryOp(Instruction::ExtractElement,
-                                                {Op, Idx0}, OldR.getDebugLoc());
-        OldR.setOperand(I, Extract);
+      // For scalar VF, operands are already scalar; no extraction needed.
+      if (!VF.isScalar()) {
+        for (const auto &[I, Op] : enumerate(OldR.operands())) {
+          // Skip operands that don't need extraction: values defined in the
+          // same block (already scalar), or values that are already single
+          // scalars.
+          auto *DefR = Op->getDefiningRecipe();
+          if ((isa_and_present<VPScalarIVStepsRecipe>(DefR) &&
+               DefR->getParent() == VPB) ||
+              vputils::isSingleScalar(Op))
+            continue;
+
+          // Extract lane zero from values defined outside the region.
+          VPValue *Extract = Builder.createNaryOp(
+              Instruction::ExtractElement, {Op, Idx0}, OldR.getDebugLoc());
+          OldR.setOperand(I, Extract);
+        }
       }
 
       if (auto *RepR = dyn_cast<VPReplicateRecipe>(&OldR)) {
@@ -705,6 +712,15 @@ static void convertRecipesInRegionBlocksToSingleScalar(VPlan &Plan, Type *IdxTy,
                              {BranchOnMask->getOperand(0)},
                              BranchOnMask->getDebugLoc());
         BranchOnMask->eraseFromParent();
+      } else if (auto *PredPhi = dyn_cast<VPPredInstPHIRecipe>(&OldR)) {
+        VPValue *PredOp = PredPhi->getOperand(0);
+        Type *PredTy = TypeInfo.inferScalarType(PredOp);
+        VPValue *PoisonVal = Plan.getOrAddLiveIn(PoisonValue::get(PredTy));
+
+        VPPhi *NewPhi = Builder.createScalarPhi({PoisonVal, PredOp},
+                                                PredPhi->getDebugLoc());
+        PredPhi->replaceAllUsesWith(NewPhi);
+        PredPhi->eraseFromParent();
       } else {
         assert((isa<VPScalarIVStepsRecipe>(OldR) ||
                 (isa<VPInstruction>(OldR) &&
@@ -768,7 +784,7 @@ static void dissolveReplicateRegion(VPRegionBlock *Region, ElementCount VF,
 
   // Process the original blocks for lane 0: converting their recipes to
   // single-scalar.
-  convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, FirstLaneEntry);
+  convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, FirstLaneEntry, VF);
 
   // Clone converted blocks for remaining lanes and process each in reverse
   // order, connecting each lane's Exiting block to the subsequent lane's entry.
@@ -802,9 +818,10 @@ static void replicateReplicateRegionsByVF(VPlan &Plan, ElementCount VF,
   SmallVector<VPRegionBlock *> ReplicateRegions;
   for (VPRegionBlock *Region : VPBlockUtils::blocksOnly<VPRegionBlock>(
            vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
-    // Skip regions with live-outs as packing scalar results back into vectors
-    // is not yet implemented.
-    if (Region->isReplicator() && Region->getExitingBasicBlock()->empty())
+    // Skip regions with live-outs when vectorizing as packing scalar results
+    // back into vectors is not yet implemented.
+    if (Region->isReplicator() &&
+        (VF.isScalar() || Region->getExitingBasicBlock()->empty()))
       ReplicateRegions.push_back(Region);
   }
 
@@ -819,12 +836,16 @@ static void replicateReplicateRegionsByVF(VPlan &Plan, ElementCount VF,
 }
 
 void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
-  if (Plan.hasScalarVFOnly())
-    return;
-
   Type *IdxTy = IntegerType::get(
       Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32);
 
+  if (Plan.hasScalarVFOnly()) {
+    // When Plan is only unrolled by UF, replicating by VF amounts to dissolving
+    // replicate regions.
+    replicateReplicateRegionsByVF(Plan, VF, IdxTy);
+    return;
+  }
+
   // Visit all VPBBs outside the loop region and directly inside the top-level
   // loop region.
   auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(

diff  --git a/llvm/test/Transforms/LoopVectorize/VPlan/dissolve-replicate-regions.ll b/llvm/test/Transforms/LoopVectorize/VPlan/dissolve-replicate-regions.ll
index a9b3f7a72dfbc..cb046214051ef 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/dissolve-replicate-regions.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/dissolve-replicate-regions.ll
@@ -23,43 +23,28 @@ define void @predicated_load(i1 %c, ptr %ptr, ptr %dst) {
 ; SCALAR-NEXT:      EMIT vp<[[VP3:%[0-9]+]]> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
 ; SCALAR-NEXT:      vp<[[VP4:%[0-9]+]]> = SCALAR-STEPS vp<[[VP3]]>, ir<1>, vp<[[VP0]]>
 ; SCALAR-NEXT:      vp<[[VP5:%[0-9]+]]> = SCALAR-STEPS vp<[[VP3]]>, ir<1>, vp<[[VP0]]>, vp<[[VP0]]>
-; SCALAR-NEXT:    Successor(s): pred.load
+; SCALAR-NEXT:      EMIT branch-on-cond ir<%c>
+; SCALAR-NEXT:    Successor(s): pred.load.if, pred.load.continue
 ; SCALAR-EMPTY:
-; SCALAR-NEXT:    <xVFxUF> pred.load: {
-; SCALAR-NEXT:      pred.load.entry:
-; SCALAR-NEXT:        BRANCH-ON-MASK ir<%c>
-; SCALAR-NEXT:      Successor(s): pred.load.if, pred.load.continue
+; SCALAR-NEXT:    pred.load.if:
+; SCALAR-NEXT:      CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[VP4]]>
+; SCALAR-NEXT:      CLONE ir<%lv> = load ir<%gep>
+; SCALAR-NEXT:    Successor(s): pred.load.continue
 ; SCALAR-EMPTY:
-; SCALAR-NEXT:      pred.load.if:
-; SCALAR-NEXT:        CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[VP4]]>
-; SCALAR-NEXT:        CLONE ir<%lv> = load ir<%gep>
-; SCALAR-NEXT:      Successor(s): pred.load.continue
+; SCALAR-NEXT:    pred.load.continue:
+; SCALAR-NEXT:      EMIT-SCALAR vp<[[VP7:%[0-9]+]]> = phi [ ir<poison>, vector.body ], [ ir<%lv>, pred.load.if ]
+; SCALAR-NEXT:      EMIT branch-on-cond ir<%c>
+; SCALAR-NEXT:    Successor(s): pred.load.if, pred.load.continue
 ; SCALAR-EMPTY:
-; SCALAR-NEXT:      pred.load.continue:
-; SCALAR-NEXT:        PHI-PREDICATED-INSTRUCTION vp<[[VP6:%[0-9]+]]> = ir<%lv>
-; SCALAR-NEXT:      No successors
-; SCALAR-NEXT:    }
-; SCALAR-NEXT:    Successor(s): pred.load
+; SCALAR-NEXT:    pred.load.if:
+; SCALAR-NEXT:      CLONE ir<%gep>.1 = getelementptr ir<%ptr>, vp<[[VP5]]>
+; SCALAR-NEXT:      CLONE ir<%lv>.1 = load ir<%gep>.1
+; SCALAR-NEXT:    Successor(s): pred.load.continue
 ; SCALAR-EMPTY:
-; SCALAR-NEXT:    <xVFxUF> pred.load: {
-; SCALAR-NEXT:      pred.load.entry:
-; SCALAR-NEXT:        BRANCH-ON-MASK ir<%c>
-; SCALAR-NEXT:      Successor(s): pred.load.if, pred.load.continue
-; SCALAR-EMPTY:
-; SCALAR-NEXT:      pred.load.if:
-; SCALAR-NEXT:        CLONE ir<%gep>.1 = getelementptr ir<%ptr>, vp<[[VP5]]>
-; SCALAR-NEXT:        CLONE ir<%lv>.1 = load ir<%gep>.1
-; SCALAR-NEXT:      Successor(s): pred.load.continue
-; SCALAR-EMPTY:
-; SCALAR-NEXT:      pred.load.continue:
-; SCALAR-NEXT:        PHI-PREDICATED-INSTRUCTION vp<[[VP7:%[0-9]+]]> = ir<%lv>.1
-; SCALAR-NEXT:      No successors
-; SCALAR-NEXT:    }
-; SCALAR-NEXT:    Successor(s): if.then.0
-; SCALAR-EMPTY:
-; SCALAR-NEXT:    if.then.0:
-; SCALAR-NEXT:      BLEND ir<%pred.val> = ir<0> vp<%6>/ir<%c>
-; SCALAR-NEXT:      BLEND ir<%pred.val>.1 = ir<0> vp<%7>/ir<%c>
+; SCALAR-NEXT:    pred.load.continue:
+; SCALAR-NEXT:      EMIT-SCALAR vp<[[VP9:%[0-9]+]]> = phi [ ir<poison>, pred.load.continue ], [ ir<%lv>.1, pred.load.if ]
+; SCALAR-NEXT:      BLEND ir<%pred.val> = ir<0> vp<%7>/ir<%c>
+; SCALAR-NEXT:      BLEND ir<%pred.val>.1 = ir<0> vp<%9>/ir<%c>
 ; SCALAR-NEXT:      CLONE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[VP4]]>
 ; SCALAR-NEXT:      CLONE ir<%gep.dst>.1 = getelementptr ir<%dst>, vp<[[VP5]]>
 ; SCALAR-NEXT:      CLONE store ir<%pred.val>, ir<%gep.dst>

diff  --git a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
index a641e7ef54de7..0a17319c04d35 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/interleave-and-scalarize-only.ll
@@ -136,9 +136,8 @@ define void @test_scalarize_with_branch_cond(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue4 ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i1
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i1 false, [[TMP0]]
-; CHECK-NEXT:    [[INDUCTION2:%.*]] = add i1 [[OFFSET_IDX]], false
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i1 [[OFFSET_IDX]], true
-; CHECK-NEXT:    br i1 [[INDUCTION2]], label %pred.store.if, label %pred.store.continue
+; CHECK-NEXT:    br i1 [[OFFSET_IDX]], label %pred.store.if, label %pred.store.continue
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr %src, i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index e93b84c2f5603..e5069d97f3117 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -273,15 +273,14 @@ define void @cast_induction_tail_folding(ptr %A) {
 ; IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; IC2:       [[VECTOR_BODY]]:
 ; IC2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
-; IC2-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; IC2-NEXT:    [[INDEX1:%.*]] = add i32 [[INDEX]], 1
-; IC2-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[TMP0]], 2
+; IC2-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDEX]], 2
 ; IC2-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[INDEX1]], 2
 ; IC2-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; IC2:       [[PRED_STORE_IF]]:
-; IC2-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP0]] to i64
+; IC2-NEXT:    [[TMP4:%.*]] = sext i32 [[INDEX]] to i64
 ; IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; IC2-NEXT:    store i32 [[TMP0]], ptr [[TMP5]], align 4
+; IC2-NEXT:    store i32 [[INDEX]], ptr [[TMP5]], align 4
 ; IC2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; IC2:       [[PRED_STORE_CONTINUE]]:
 ; IC2-NEXT:    br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]

diff  --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 610da05dbbab7..c07134229322e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -118,17 +118,16 @@ define void @pr45679(ptr %A) {
 ; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VF1UF4:       vector.body:
 ; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; VF1UF4-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; VF1UF4-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
 ; VF1UF4-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
 ; VF1UF4-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13
+; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[INDEX]], 13
 ; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13
 ; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13
 ; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
-; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; VF1UF4-NEXT:    store i32 13, ptr [[TMP8]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
@@ -289,17 +288,16 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; VF1UF4:       vector.body:
 ; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; VF1UF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; VF1UF4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; VF1UF4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; VF1UF4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13
+; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 13
 ; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13
 ; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13
 ; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
-; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; VF1UF4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF1UF4-NEXT:    store i64 [[TMP9]], ptr [[B:%.*]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]

diff  --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index c6bcd708c0fce..57e0e9b63b8de 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -17,17 +17,16 @@ define void @VF1-VPlanExe(ptr %dst) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 14
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[INDEX]], 14
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 14
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 14
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 14
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue: