[llvm] e9e3a18 - [LV] Don't cost branches and conditions to empty blocks.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 18 04:51:25 PDT 2024
Author: Florian Hahn
Date: 2024-08-18T12:51:17+01:00
New Revision: e9e3a183d6d8d8729223b7131aa57fd9f02f0440
URL: https://github.com/llvm/llvm-project/commit/e9e3a183d6d8d8729223b7131aa57fd9f02f0440
DIFF: https://github.com/llvm/llvm-project/commit/e9e3a183d6d8d8729223b7131aa57fd9f02f0440.diff
LOG: [LV] Don't cost branches and conditions to empty blocks.
Update the legacy cost model skip branches with successors blocks
that are empty or only contain dead instructions, together with their
conditions. Such branches and conditions won't result in any
generated code and will be cleaned up by VPlan transforms.
This fixes a difference between the legacy and VPlan-based cost model.
When running LV in its usual pipeline position, such dead blocks should
already have been cleaned up, but they might be generated manually or by
fuzzers.
Fixes https://github.com/llvm/llvm-project/issues/100591.
Added:
llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 27e99694aafd30..55c0ba3dd8f9bc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6730,9 +6730,12 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
return RequiresScalarEpilogue &&
!TheLoop->contains(cast<Instruction>(U)->getParent());
};
+
+ LoopBlocksDFS DFS(TheLoop);
+ DFS.perform(LI);
MapVector<Value *, SmallVector<Value *>> DeadInvariantStoreOps;
- for (BasicBlock *BB : TheLoop->blocks())
- for (Instruction &I : *BB) {
+ for (BasicBlock *BB : reverse(make_range(DFS.beginRPO(), DFS.endRPO())))
+ for (Instruction &I : reverse(*BB)) {
// Find all stores to invariant variables. Since they are going to sink
// outside the loop we do not need calculate cost for them.
StoreInst *SI;
@@ -6765,6 +6768,13 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
Value *PointerOp = getLoadStorePointerOperand(&I);
DeadInterleavePointerOps.push_back(PointerOp);
}
+
+ // Queue branches for analysis. They are dead, if their successors only
+ // contain dead instructions.
+ if (auto *Br = dyn_cast<BranchInst>(&I)) {
+ if (Br->isConditional())
+ DeadOps.push_back(&I);
+ }
}
// Mark ops feeding interleave group members as free, if they are only used
@@ -6789,8 +6799,36 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
// Mark ops that would be trivially dead and are only used by ignored
// instructions as free.
BasicBlock *Header = TheLoop->getHeader();
+
+ // Returns true if the block contains only dead instructions. Such blocks will
+ // be removed by VPlan-to-VPlan transforms and won't be considered by the
+ // VPlan-based cost model, so skip them in the legacy cost-model as well.
+ auto IsEmptyBlock = [this](BasicBlock *BB) {
+ return all_of(*BB, [this](Instruction &I) {
+ return ValuesToIgnore.contains(&I) || VecValuesToIgnore.contains(&I) ||
+ (isa<BranchInst>(&I) && !cast<BranchInst>(&I)->isConditional());
+ });
+ };
for (unsigned I = 0; I != DeadOps.size(); ++I) {
auto *Op = dyn_cast<Instruction>(DeadOps[I]);
+
+ // Check if the branch should be considered dead.
+ if (auto *Br = dyn_cast_or_null<BranchInst>(Op)) {
+ BasicBlock *ThenBB = Br->getSuccessor(0);
+ BasicBlock *ElseBB = Br->getSuccessor(1);
+ bool ThenEmpty = IsEmptyBlock(ThenBB);
+ bool ElseEmpty = IsEmptyBlock(ElseBB);
+ if ((ThenEmpty && ElseEmpty) ||
+ (ThenEmpty && ThenBB->getSingleSuccessor() == ElseBB &&
+ ElseBB->phis().empty()) ||
+ (ElseEmpty && ElseBB->getSingleSuccessor() == ThenBB &&
+ ThenBB->phis().empty())) {
+ VecValuesToIgnore.insert(Br);
+ DeadOps.push_back(Br->getCondition());
+ }
+ continue;
+ }
+
// Skip any op that shouldn't be considered dead.
if (!Op || !TheLoop->contains(Op) ||
(isa<PHINode>(Op) && Op->getParent() == Header) ||
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
new file mode 100644
index 00000000000000..d970b427d035da
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll
@@ -0,0 +1,850 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
+; CHECK-LABEL: define void @block_with_dead_inst_1(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -3
+; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 [[TMP6]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP8]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
+; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP15]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
+; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
+; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i16 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[DEAD_GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[TMP25]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i16 [ 1, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i16 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %then, label %loop.latch
+
+then:
+ %dead.gep = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+loop.latch:
+ store i16 %xor, ptr %gep
+ %iv.next = add nsw i64 %iv, 3
+ %1 = icmp eq i64 %iv.next, %N
+ br i1 %1, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @block_with_dead_inst_2(ptr %src) #0 {
+; CHECK-LABEL: define void @block_with_dead_inst_2(
+; CHECK-SAME: ptr [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 333, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 333, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 333, [[TMP5]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP12]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
+; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
+; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i16 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[DEAD_GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i16 [ 0, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i16 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %loop.latch, label %else
+
+else:
+ %dead.gep = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+loop.latch:
+ store i16 %xor, ptr %gep
+ %iv.next = add nsw i64 %iv, 3
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
+; CHECK-LABEL: define void @multiple_blocks_with_dead_insts_3(
+; CHECK-SAME: ptr [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 333, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 333, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 333, [[TMP5]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP12]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
+; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 0, i32 [[TMP16]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
+; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i16 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i16 [ 0, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i16 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %then, label %else
+
+then:
+ %dead.gep.1 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+else:
+ %dead.gep.2 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+loop.latch:
+ store i16 %xor, ptr %gep
+ %iv.next = add nsw i64 %iv, 3
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
+; CHECK-LABEL: define void @multiple_blocks_with_dead_insts_4(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -3
+; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 [[TMP6]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP8]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
+; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP15]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
+; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
+; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i16 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br label %[[THEN_1:.*]]
+; CHECK: [[THEN_1]]:
+; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i16 [ 1, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i16 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %then, label %else
+
+then:
+ br label %then.1
+
+then.1:
+ %dead.gep.1 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+else:
+ %dead.gep.2 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+loop.latch:
+ store i16 %xor, ptr %gep
+ %iv.next = add nsw i64 %iv, 3
+ %ec = icmp eq i64 %iv.next, %N
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
+; CHECK-LABEL: define void @multiple_blocks_with_dead_inst_multiple_successors_5(
+; CHECK-SAME: ptr [[SRC:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 333, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 333, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 333, [[TMP5]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP9]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 4
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 3, [[TMP12]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 4
+; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 1, i32 [[TMP16]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> zeroinitializer, <vscale x 4 x ptr> [[TMP17]], i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], 4
+; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[TMP20]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> zeroinitializer, i32 [[TMP21]]
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i16 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br label %[[THEN_1:.*]]
+; CHECK: [[THEN_1]]:
+; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[ELSE_2:.*]]
+; CHECK: [[ELSE_2]]:
+; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i16 [ 1, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i16 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %then, label %else
+
+then:
+ br label %then.1
+
+then.1:
+ %dead.gep.1 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+else:
+ br label %else.2
+
+else.2:
+ %dead.gep.2 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+loop.latch:
+ store i16 %xor, ptr %gep
+ %iv.next = add nsw i64 %iv, 3
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %ic, i64 %N) #0 {
+; CHECK-LABEL: define void @multiple_blocks_with_dead_inst_multiple_successors_6(
+; CHECK-SAME: ptr [[SRC:%.*]], i1 [[IC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -3
+; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 [[TMP6]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP8]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 3
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
+; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = mul <vscale x 8 x i64> [[TMP12]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = mul i64 3, [[TMP15]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP16]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 8
+; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 1, i32 [[TMP19]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP20]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 8
+; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i16> zeroinitializer, i32 [[TMP24]]
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i16 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br i1 [[IC]], label %[[THEN_1:.*]], label %[[ELSE]]
+; CHECK: [[THEN_1]]:
+; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[ELSE_2:.*]]
+; CHECK: [[ELSE_2]]:
+; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i16 [ 1, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i16 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %then, label %else
+
+then:
+ br i1 %ic, label %then.1, label %else
+
+then.1:
+ %dead.gep.1 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+else:
+ br label %else.2
+
+else.2:
+ %dead.gep.2 = getelementptr i64, ptr %src, i64 %iv
+ br label %loop.latch
+
+loop.latch:
+ store i16 %xor, ptr %gep
+ %iv.next = add nsw i64 %iv, 3
+ %ec = icmp eq i64 %iv.next, %N
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 {
+; CHECK-LABEL: define void @empty_block_with_phi_1(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 1, i32 [[TMP8]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 8 x i16> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP12]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 99, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i16> [[WIDE_LOAD]]
+; CHECK-NEXT: store <vscale x 8 x i16> [[PREDPHI]], ptr [[TMP11]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP16]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i32 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[P:%.*]] = phi i16 [ [[L]], %[[LOOP_HEADER]] ], [ 99, %[[THEN]] ]
+; CHECK-NEXT: store i16 [[P]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i32 [ 1, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i32 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %then, label %loop.latch
+
+then:
+ br label %loop.latch
+
+loop.latch:
+ %p = phi i16 [ %l, %loop.header ], [ 99, %then ]
+ store i16 %p, ptr %gep
+ %iv.next = add nsw i64 %iv, 1
+ %1 = icmp eq i64 %iv.next, %N
+ br i1 %1, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 {
+; CHECK-LABEL: define void @empty_block_with_phi_2(
+; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 8
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 1, i32 [[TMP8]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 8 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ zeroinitializer, %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP11]], align 2
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 8 x i16> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <vscale x 8 x i1> [[TMP12]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 99, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i16> [[WIDE_LOAD]]
+; CHECK-NEXT: store <vscale x 8 x i16> [[PREDPHI]], ptr [[TMP11]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 8
+; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], 1
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 8 x i32> zeroinitializer, i32 [[TMP17]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[XOR1315:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ]
+; CHECK-NEXT: [[XOR]] = xor i32 0, 0
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[P:%.*]] = phi i16 [ [[L]], %[[LOOP_HEADER]] ], [ 99, %[[ELSE]] ]
+; CHECK-NEXT: store i16 [[P]], ptr [[GEP]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
+; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[TMP18]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %cond.end7, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %xor1315 = phi i32 [ 1, %entry ], [ %xor, %loop.latch ]
+ %xor = xor i32 0, 0
+ %gep = getelementptr i16, ptr %src, i64 %iv
+ %l = load i16, ptr %gep, align 2
+ %c = icmp eq i16 %l, 0
+ br i1 %c, label %loop.latch, label %else
+
+else:
+ br label %loop.latch
+
+loop.latch:
+ %p = phi i16 [ %l, %loop.header ], [ 99, %else ]
+ store i16 %p, ptr %gep
+ %iv.next = add nsw i64 %iv, 1
+ %1 = icmp eq i64 %iv.next, %N
+ br i1 %1, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-features"="+64bit,+v" }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
+; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
+; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
+; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list