[llvm] [LV] Optimise users of induction variables in early exit blocks (PR #130766)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 11 05:53:34 PDT 2025
https://github.com/david-arm created https://github.com/llvm/llvm-project/pull/130766
This is the second of two PRs that attempts to improve the IR
generated in the exit blocks of vectorised loops with uncountable
early exits. It follows on from PR #128880. In this PR I am
improving the generated code for users of induction variables in
early exit blocks.
This required adding a new VPInstruction called FirstActiveLane,
which calculates the index of the first active predicate in the
mask operand.
I have added a new function optimizeEarlyExitInductionUser that
is called from optimizeInductionExitUsers when handling users in
early exit blocks.
>From 235ceacb928004dbdfbd4f876cb8867808f59618 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 11 Mar 2025 12:33:30 +0000
Subject: [PATCH 1/2] Add tests
---
.../single_early_exit_live_outs.ll | 613 ++++++++++++++++--
1 file changed, 573 insertions(+), 40 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 9cb4b6f9d8680..7d4b5920a8fe9 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -86,6 +86,443 @@ loop.end:
}
+define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() {
+; CHECK-LABEL: define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 9, i32 11, i32 13, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i32> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ 137, [[MIDDLE_BLOCK]] ], [ 9, [[ENTRY]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[INDEX2_NEXT]] = add i32 [[INDEX2]], 2
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[INDEX2]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %index2 = phi i32 [ %index2.next, %loop.inc ], [ 9, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %index2.next = add i32 %index2, 2
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i32 [ %index2, %loop ], [ 67, %loop.inc ]
+ ret i32 %retval
+}
+
+
+define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() {
+; CHECK-LABEL: define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P1]], i64 64
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i128 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 9, i32 11, i32 13, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i128 [[INDEX1]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i128 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i128 [[INDEX_NEXT2]], 64
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i32> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ 137, [[MIDDLE_BLOCK]] ], [ 9, [[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[P1]], [[ENTRY]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i128 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i128 [[INDEX]], 1
+; CHECK-NEXT: [[INDEX2_NEXT]] = add i32 [[INDEX2]], 2
+; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i128 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[INDEX2]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i128 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %index2 = phi i32 [ %index2.next, %loop.inc ], [ 9, %entry ]
+ %ptr = phi ptr [ %ptr.next, %loop.inc ], [ %p1, %entry ]
+ %ld1 = load i8, ptr %ptr, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i128 %index, 1
+ %index2.next = add i32 %index2, 2
+ %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 1
+ %exitcond = icmp ne i128 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i32 [ %index2, %loop ], [ 67, %loop.inc ]
+ ret i32 %retval
+}
+
+
+define float @same_exit_block_pre_inc_use1_iv64_endf32() {
+; CHECK-LABEL: define float @same_exit_block_pre_inc_use1_iv64_endf32() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00)
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x float> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ 7.300000e+01, [[MIDDLE_BLOCK]] ], [ 9.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[INDEX2:%.*]] = phi float [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[INDEX2_NEXT]] = fadd fast float [[INDEX2]], 1.000000e+00
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi float [ [[INDEX2]], [[LOOP]] ], [ 1.230000e+02, [[LOOP_INC]] ], [ 1.230000e+02, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret float [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %index2 = phi float [ %index2.next, %loop.inc ], [ 9.0e+00, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %index2.next = fadd fast float %index2, 1.0e+00
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi float [ %index2, %loop ], [ 1.23e+02, %loop.inc ]
+ ret float %retval
+}
+
+
+define ptr @same_exit_block_pre_inc_use1_iv64_endptr() {
+; CHECK-LABEL: define ptr @same_exit_block_pre_inc_use1_iv64_endptr() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P2]], i64 320
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX1]], 5
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX2]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX2]], 5
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX2]], 10
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX2]], 15
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP2]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]]
+; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP4]]
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x ptr> [[TMP7]], ptr [[NEXT_GEP4]], i32 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[NEXT_GEP5]], i32 3
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD6]]
+; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
+; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]])
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 64
+; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP16]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true)
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[ENTRY]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[INDEX2:%.*]] = phi ptr [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[INDEX2_NEXT]] = getelementptr i8, ptr [[INDEX2]], i64 5
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[INDEX2]], [[LOOP]] ], [ [[P1]], [[LOOP_INC]] ], [ [[P1]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret ptr [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ %p2 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %index2 = phi ptr [ %index2.next, %loop.inc ], [ %p2, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %index2.next = getelementptr i8, ptr %index2, i64 5
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi ptr [ %index2, %loop ], [ %p1, %loop.inc ]
+ ret ptr %retval
+}
+
+
+define ptr @same_exit_block_pre_inc_use1_ivptr() {
+; CHECK-LABEL: define ptr @same_exit_block_pre_inc_use1_ivptr() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: [[PTREND:%.*]] = getelementptr i8, ptr [[P1]], i64 1024
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P1]], i64 1024
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
+; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP2]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP2]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x ptr> [[TMP7]], ptr [[NEXT_GEP3]], i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 72)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
+; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP12]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true)
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x ptr> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[P1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[PTR_NEXT]], [[PTREND]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTR]], [[LOOP]] ], [ [[PTREND]], [[LOOP_INC]] ], [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret ptr [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ %ptrend = getelementptr i8, ptr %p1, i64 1024
+ br label %loop
+
+loop:
+ %ptr = phi ptr [ %ptr.next, %loop.inc ], [ %p1, %entry ]
+ %ld1 = load i8, ptr %ptr, align 1
+ %cmp3 = icmp eq i8 %ld1, 72
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 1
+ %exitcond = icmp ne ptr %ptr.next, %ptrend
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi ptr [ %ptr, %loop ], [ %ptrend, %loop.inc ]
+ ret ptr %retval
+}
+
+
define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK-LABEL: define i64 @same_exit_block_pre_inc1_use_inv_cond(
; CHECK-SAME: i1 [[COND:%.*]]) {
@@ -116,7 +553,7 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -140,7 +577,7 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -201,7 +638,7 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -224,7 +661,7 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -284,7 +721,7 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -307,7 +744,7 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -365,7 +802,7 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -386,7 +823,7 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -446,7 +883,7 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -469,7 +906,7 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
@@ -527,7 +964,7 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -548,7 +985,7 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -606,7 +1043,7 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -629,7 +1066,7 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -661,6 +1098,90 @@ loop.end:
}
+define ptr @same_exit_block_post_inc_use1_ivptr() {
+; CHECK-LABEL: define ptr @same_exit_block_post_inc_use1_ivptr() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: [[PTREND:%.*]] = getelementptr i8, ptr [[P1]], i64 1024
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P1]], i64 1024
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
+; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP2]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP1]], i64 1
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 1
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr> [[TMP10]], ptr [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x ptr> [[TMP11]], ptr [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr> [[TMP12]], ptr [[TMP9]], i32 3
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 72)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
+; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]])
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP16]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true)
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x ptr> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[P1]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1
+; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[PTR_NEXT]], [[PTREND]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTR_NEXT]], [[LOOP]] ], [ [[PTREND]], [[LOOP_INC]] ], [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: ret ptr [[RETVAL]]
+;
+entry:
+ %p1 = alloca [1024 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ %ptrend = getelementptr i8, ptr %p1, i64 1024
+ br label %loop
+
+loop:
+ %ptr = phi ptr [ %ptr.next, %loop.inc ], [ %p1, %entry ]
+ %ld1 = load i8, ptr %ptr, align 1
+ %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 1
+ %cmp3 = icmp eq i8 %ld1, 72
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %exitcond = icmp ne ptr %ptr.next, %ptrend
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi ptr [ %ptr.next, %loop ], [ %ptrend, %loop.inc ]
+ ret ptr %retval
+}
+
+
define i64 @same_exit_block_post_inc_use2() {
; CHECK-LABEL: define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: entry:
@@ -698,7 +1219,7 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -721,7 +1242,7 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
; CHECK: loop.inc:
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP31:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -781,7 +1302,7 @@ define i64 @diff_exit_block_pre_inc_use1() {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -804,7 +1325,7 @@ define i64 @diff_exit_block_pre_inc_use1() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK: loop.early.exit:
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -869,7 +1390,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -890,7 +1411,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP35:![0-9]+]]
; CHECK: loop.early.exit:
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -957,7 +1478,7 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -980,7 +1501,7 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP37:![0-9]+]]
; CHECK: loop.early.exit:
; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
@@ -1045,7 +1566,7 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1068,7 +1589,7 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: loop.early.exit:
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -1144,14 +1665,14 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[TMP11]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP11]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1167,9 +1688,9 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]]
; CHECK: loop.inc:
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP41:![0-9]+]]
; CHECK: loop.early.exit:
-; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[TMP21]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
@@ -1246,7 +1767,7 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) {
; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]])
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64
; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP20]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1273,7 +1794,7 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) {
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]]
; CHECK: loop.inc:
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: loop.early.exit:
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX2_NEXT]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
@@ -1340,7 +1861,7 @@ define i64 @loop_contains_safe_call() {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1362,7 +1883,7 @@ define i64 @loop_contains_safe_call() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP33:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP45:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -1419,7 +1940,7 @@ define i64 @loop_contains_safe_div() {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1441,7 +1962,7 @@ define i64 @loop_contains_safe_div() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP35:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP47:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -1499,7 +2020,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
@@ -1523,7 +2044,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP37:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP49:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -1585,7 +2106,7 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP9]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1608,7 +2129,7 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]], !llvm.loop [[LOOP39:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ], [ 1024, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -1665,7 +2186,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
@@ -1688,7 +2209,7 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP53:![0-9]+]]
; CHECK: loop.end:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
@@ -1763,4 +2284,16 @@ attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" }
; CHECK: [[LOOP39]] = distinct !{[[LOOP39]], [[META2]], [[META1]]}
; CHECK: [[LOOP40]] = distinct !{[[LOOP40]], [[META1]], [[META2]]}
; CHECK: [[LOOP41]] = distinct !{[[LOOP41]], [[META2]], [[META1]]}
+; CHECK: [[LOOP42]] = distinct !{[[LOOP42]], [[META1]], [[META2]]}
+; CHECK: [[LOOP43]] = distinct !{[[LOOP43]], [[META2]], [[META1]]}
+; CHECK: [[LOOP44]] = distinct !{[[LOOP44]], [[META1]], [[META2]]}
+; CHECK: [[LOOP45]] = distinct !{[[LOOP45]], [[META2]], [[META1]]}
+; CHECK: [[LOOP46]] = distinct !{[[LOOP46]], [[META1]], [[META2]]}
+; CHECK: [[LOOP47]] = distinct !{[[LOOP47]], [[META2]], [[META1]]}
+; CHECK: [[LOOP48]] = distinct !{[[LOOP48]], [[META1]], [[META2]]}
+; CHECK: [[LOOP49]] = distinct !{[[LOOP49]], [[META2]], [[META1]]}
+; CHECK: [[LOOP50]] = distinct !{[[LOOP50]], [[META1]], [[META2]]}
+; CHECK: [[LOOP51]] = distinct !{[[LOOP51]], [[META2]], [[META1]]}
+; CHECK: [[LOOP52]] = distinct !{[[LOOP52]], [[META1]], [[META2]]}
+; CHECK: [[LOOP53]] = distinct !{[[LOOP53]], [[META2]], [[META1]]}
;.
>From 2c513e7bc1ce0042667683a8fde619a29c550ef9 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 11 Mar 2025 12:34:05 +0000
Subject: [PATCH 2/2] [LV] Optimise users of induction variables in early exit
blocks
This is the second of two PRs that attempts to improve the IR
generated in the exit blocks of vectorised loops with uncountable
early exits. It follows on from PR #128880. In this PR I am
improving the generated code for users of induction variables in
early exit blocks.
This required adding a new VPInstruction called FirstActiveLane,
which calculates the index of the first active predicate in the
mask operand.
I have added a new function optimizeEarlyExitInductionUser that
is called from optimizeInductionExitUsers when handling with
users in early exit blocks.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 2 +
.../Transforms/Vectorize/VPlanAnalysis.cpp | 2 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 71 ++++++-
.../AArch64/simple_early_exit.ll | 35 +---
.../single_early_exit_live_outs.ll | 190 ++++++------------
6 files changed, 149 insertions(+), 160 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 43dc30c40bb53..979123d1ab0af 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,6 +882,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
// Extracts the first active lane of a vector, where the first operand is
// the predicate, and the second operand is the vector to extract.
ExtractFirstActive,
+ // Calculates the first active lane index of the vector predicate operand.
+ FirstActiveLane,
};
private:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 6f6875f0e5e0e..2aea5f6286668 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -85,6 +85,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return VecTy->getElementType();
return BaseTy;
}
+ case VPInstruction::FirstActiveLane:
+ return IntegerType::get(Ctx, 64);
case VPInstruction::LogicalAnd:
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a8b304271f0da..2f11ce7e7fe71 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -712,6 +712,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.getInt64Ty(), Mask, true, "first.active.lane");
return Builder.CreateExtractElement(Vec, Ctz, "early.exit.value");
}
+ case VPInstruction::FirstActiveLane: {
+ Value *Mask = State.get(getOperand(0));
+ return Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask,
+ true, "first.active.lane");
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -754,6 +759,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
getOpcode() == VPInstruction::ExtractFirstActive ||
+ getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
@@ -946,6 +952,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFirstActive:
O << "extract-first-active";
break;
+ case VPInstruction::FirstActiveLane:
+ O << "first-active-lane";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3b44e95a2471c..ecb5b64461a64 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -737,6 +737,64 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
return IsWideIVInc() ? WideIV : nullptr;
}
+/// Attempts to optimize the induction variable exit values for users in the
+/// early exit block.
+static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
+ VPTypeAnalysis &TypeInfo,
+ VPBlockBase *PredVPBB,
+ VPValue *Op) {
+ using namespace VPlanPatternMatch;
+
+ VPValue *Incoming, *Mask;
+ if (!match(Op, m_VPInstruction<VPInstruction::ExtractFirstActive>(
+ m_VPValue(Incoming), m_VPValue(Mask))))
+ return nullptr;
+
+ auto *WideIV = getOptimizableIVOf(Incoming);
+ if (!WideIV)
+ return nullptr;
+
+ auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
+ if (WideIntOrFp && WideIntOrFp->getTruncInst())
+ return nullptr;
+
+ // Calculate the final index.
+ VPValue *EndValue = Plan.getCanonicalIV();
+ auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+ VPBuilder B(cast<VPBasicBlock>(PredVPBB));
+
+ DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
+ VPValue *FirstActiveLane =
+ B.createNaryOp(VPInstruction::FirstActiveLane, Mask, DL);
+ if (CanonicalIVType != TypeInfo.inferScalarType(FirstActiveLane)) {
+ Instruction::CastOps CastOp = CanonicalIVType->getScalarSizeInBits() < 64
+ ? Instruction::Trunc
+ : Instruction::ZExt;
+ FirstActiveLane =
+ B.createScalarCast(CastOp, FirstActiveLane, CanonicalIVType, DL);
+ }
+ EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL);
+
+ // `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
+ // changed it means the exit is using the incremented value, so we need to
+ // add the step.
+ if (Incoming != WideIV) {
+ VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(CanonicalIVType, 1));
+ EndValue = B.createNaryOp(Instruction::Add, {EndValue, One}, DL);
+ }
+
+ if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
+ const InductionDescriptor &ID = WideIV->getInductionDescriptor();
+ VPValue *Start = WideIV->getStartValue();
+ VPValue *Step = WideIV->getStepValue();
+ EndValue = B.createDerivedIV(
+ ID.getKind(), dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()),
+ Start, EndValue, Step);
+ }
+
+ return EndValue;
+}
+
/// Attempts to optimize the induction variable exit values for users in the
/// exit block coming from the latch in the original scalar loop.
static VPValue *
@@ -799,12 +857,15 @@ void VPlanTransforms::optimizeInductionExitUsers(
break;
for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
+ VPValue *Escape = nullptr;
if (PredVPBB == MiddleVPBB)
- if (VPValue *Escape = optimizeLatchExitInductionUser(
- Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx),
- EndValues))
- ExitIRI->setOperand(Idx, Escape);
- // TODO: Optimize early exit induction users in follow-on patch.
+ Escape = optimizeLatchExitInductionUser(
+ Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), EndValues);
+ else
+ Escape = optimizeEarlyExitInductionUser(Plan, TypeInfo, PredVPBB,
+ ExitIRI->getOperand(Idx));
+ if (Escape)
+ ExitIRI->setOperand(Idx, Escape);
}
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 5f926db1131f6..02a16234b86cb 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -25,16 +25,9 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
; CHECK-NEXT: [[TMP6:%.*]] = add i64 3, [[N_VEC]]
-; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 16 x i64> [[TMP7]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP8]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP10]]
@@ -48,7 +41,6 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[TMP15]], splat (i1 true)
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
@@ -58,7 +50,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP16]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP20]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -140,7 +133,8 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -197,7 +191,6 @@ define i64 @loop_contains_safe_call() #1 {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[TMP0]]
@@ -209,7 +202,6 @@ define i64 @loop_contains_safe_call() #1 {
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.split:
@@ -218,7 +210,8 @@ define i64 @loop_contains_safe_call() #1 {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -283,16 +276,9 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
-; CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 4 x i64> [[TMP16]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> splat (i64 3), [[TMP17]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 3, [[INDEX2]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX1]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
@@ -304,7 +290,6 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: [[TMP15:%.*]] = xor <vscale x 4 x i1> [[TMP14]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.split:
@@ -314,7 +299,8 @@ define i64 @loop_contains_safe_div() #1 {
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP16]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[OFFSET_IDX]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -375,7 +361,6 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
@@ -389,7 +374,6 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.split:
@@ -399,7 +383,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 7d4b5920a8fe9..82b908eab4871 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -15,7 +15,6 @@ define i64 @same_exit_block_pre_inc_use1() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -29,7 +28,6 @@ define i64 @same_exit_block_pre_inc_use1() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
@@ -38,7 +36,8 @@ define i64 @same_exit_block_pre_inc_use1() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -98,7 +97,6 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 9, i32 11, i32 13, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -112,7 +110,6 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.split:
@@ -121,7 +118,10 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i32> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[DOTCAST]], 2
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i32 9, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -185,7 +185,6 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i128 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 9, i32 11, i32 13, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i128 [[INDEX1]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
@@ -196,7 +195,6 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() {
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i128 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.split:
@@ -205,7 +203,11 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i32> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i64 [[FIRST_ACTIVE_LANE]] to i128
+; CHECK-NEXT: [[TMP9:%.*]] = add i128 [[INDEX1]], [[TMP8]]
+; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i128 [[TMP9]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[DOTCAST]], 2
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i32 9, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i128 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -267,7 +269,6 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 9.000000e+00, float 1.000000e+01, float 1.100000e+01, float 1.200000e+01>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -281,7 +282,6 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.split:
@@ -290,7 +290,10 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x float> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float
+; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = fadd fast float 9.000000e+00, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -358,19 +361,6 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX1]], 5
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX2]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX2]], 5
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX2]], 10
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX2]], 15
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP2]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]]
-; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP4]]
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x ptr> [[TMP7]], ptr [[NEXT_GEP4]], i32 2
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[NEXT_GEP5]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
@@ -390,7 +380,9 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 5
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP20]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -456,17 +448,7 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
-; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP2]]
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP3]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[NEXT_GEP1]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[NEXT_GEP2]], i32 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x ptr> [[TMP7]], ptr [[NEXT_GEP3]], i32 3
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 72)
@@ -482,7 +464,8 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP11]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x ptr> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[P1]], [[ENTRY:%.*]] ]
@@ -536,7 +519,6 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -551,7 +533,6 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.split:
@@ -560,7 +541,8 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -622,7 +604,6 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[TMP0]]
@@ -636,7 +617,6 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.split:
@@ -645,7 +625,8 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -705,7 +686,6 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -719,7 +699,6 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.split:
@@ -728,7 +707,8 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -867,7 +847,6 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -881,7 +860,6 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.split:
@@ -890,7 +868,8 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -971,7 +950,8 @@ define i64 @same_exit_block_pre_inc_use4() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1027,7 +1007,6 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -1041,7 +1020,6 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.split:
@@ -1050,7 +1028,8 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1111,23 +1090,9 @@ define ptr @same_exit_block_post_inc_use1_ivptr() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP1]]
-; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP2]]
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP3]]
-; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP1]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 1
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr> [[TMP10]], ptr [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x ptr> [[TMP11]], ptr [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr> [[TMP12]], ptr [[TMP9]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 72)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
@@ -1141,7 +1106,9 @@ define ptr @same_exit_block_post_inc_use1_ivptr() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP15]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x ptr> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP9]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[P1]], [[ENTRY:%.*]] ]
@@ -1196,23 +1163,12 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> poison, i64 [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP9]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i64> [[TMP13]], i64 [[TMP10]], i32 2
-; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 3
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
@@ -1226,7 +1182,9 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[TMP15]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1286,7 +1244,6 @@ define i64 @diff_exit_block_pre_inc_use1() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -1300,7 +1257,6 @@ define i64 @diff_exit_block_pre_inc_use1() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.split:
@@ -1309,7 +1265,8 @@ define i64 @diff_exit_block_pre_inc_use1() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1462,7 +1419,6 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -1476,7 +1432,6 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; CHECK: middle.split:
@@ -1485,7 +1440,8 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1550,7 +1506,6 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -1564,7 +1519,6 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP14]], [[TMP15]]
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
; CHECK: middle.split:
@@ -1573,7 +1527,8 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1642,17 +1597,6 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP6]], i32 2
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP7]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
@@ -1672,7 +1616,9 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true)
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP11]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = add i64 3, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1742,19 +1688,6 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = add i64 [[START]], [[INDEX1]]
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX2]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX2]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX2]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX2]], 3
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP3]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP4]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP5]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP7]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP9]], i32 3
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP15]], align 1
@@ -1775,7 +1708,9 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP19]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 [[START]], [[TMP12]]
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1847,7 +1782,6 @@ define i64 @loop_contains_safe_call() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[TMP0]]
@@ -1859,7 +1793,6 @@ define i64 @loop_contains_safe_call() {
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; CHECK: middle.split:
@@ -1868,7 +1801,8 @@ define i64 @loop_contains_safe_call() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -1926,7 +1860,6 @@ define i64 @loop_contains_safe_div() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
@@ -1938,7 +1871,6 @@ define i64 @loop_contains_safe_div() {
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
; CHECK: middle.split:
@@ -1947,7 +1879,8 @@ define i64 @loop_contains_safe_div() {
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -2004,7 +1937,6 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP0]]
@@ -2018,7 +1950,6 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
; CHECK: middle.split:
@@ -2028,7 +1959,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -2086,7 +2018,6 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1023, i64 1022, i64 1021, i64 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -2104,7 +2035,6 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 1020
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]]
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
; CHECK: middle.split:
@@ -2113,7 +2043,8 @@ define i64 @same_exit_block_pre_inc_use1_reverse() {
; CHECK-NEXT: br i1 false, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = sub i64 1023, [[TMP12]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, [[MIDDLE_BLOCK]] ], [ 1023, [[ENTRY:%.*]] ]
@@ -2170,7 +2101,6 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -2184,7 +2114,6 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
; CHECK: middle.split:
@@ -2193,7 +2122,8 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
-; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]]
+; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]]
; CHECK-NEXT: br label [[LOOP_END]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
More information about the llvm-commits
mailing list