[llvm] [LV] Optimise latch exit induction users for some early exit loops (PR #128880)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 06:22:25 PST 2025
https://github.com/david-arm created https://github.com/llvm/llvm-project/pull/128880
This is the first of two PRs that attempts to improve the IR
generated in the exit blocks of vectorised loops with uncountable
early exits. In this PR I am improving the generated code for
users of induction variables in early exit loops that have a
unique exit block, when exiting via the latch.
I intend to follow this up very soon with another patch to
optimise the code for induction users in the vector.early.exit
block.
>From 69b05feb8d3449418ac980c294b04262176cddaa Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 26 Feb 2025 14:02:36 +0000
Subject: [PATCH] [LV] Optimise latch exit induction users for some early exit
loops
This is the first of two PRs that attempts to improve the IR
generated in the exit blocks of vectorised loops with uncountable
early exits. In this PR I am improving the generated code for
users of induction variables in early exit loops that have a
unique exit block, when exiting via the latch.
I intend to follow this up very soon with another patch to
optimise the code for induction users in the vector.early.exit
block.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 115 ++++++++++--------
.../single_early_exit_live_outs.ll | 44 ++-----
2 files changed, 69 insertions(+), 90 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8e773272300b1..bf643b0a51631 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -730,67 +730,74 @@ static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
return IsWideIVInc() ? WideIV : nullptr;
}
-void VPlanTransforms::optimizeInductionExitUsers(
- VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
+static VPValue *
+optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
+ VPBlockBase *PredVPBB, VPValue *Op,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
using namespace VPlanPatternMatch;
- SmallVector<VPIRBasicBlock *> ExitVPBBs(Plan.getExitBlocks());
- if (ExitVPBBs.size() != 1)
- return;
- VPIRBasicBlock *ExitVPBB = ExitVPBBs[0];
- VPBlockBase *PredVPBB = ExitVPBB->getSinglePredecessor();
- if (!PredVPBB)
- return;
- assert(PredVPBB == Plan.getMiddleBlock() &&
- "predecessor must be the middle block");
-
- VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
- VPBuilder B(Plan.getMiddleBlock()->getTerminator());
- for (VPRecipeBase &R : *ExitVPBB) {
- auto *ExitIRI = cast<VPIRInstruction>(&R);
- if (!isa<PHINode>(ExitIRI->getInstruction()))
- break;
+ VPValue *Incoming;
+ if (!match(Op, m_VPInstruction<VPInstruction::ExtractFromEnd>(
+ m_VPValue(Incoming), m_SpecificInt(1))))
+ return nullptr;
- VPValue *Incoming;
- if (!match(ExitIRI->getOperand(0),
- m_VPInstruction<VPInstruction::ExtractFromEnd>(
- m_VPValue(Incoming), m_SpecificInt(1))))
- continue;
+ auto *WideIV = getOptimizableIVOf(Incoming);
+ if (!WideIV)
+ return nullptr;
- auto *WideIV = getOptimizableIVOf(Incoming);
- if (!WideIV)
- continue;
- VPValue *EndValue = EndValues.lookup(WideIV);
- assert(EndValue && "end value must have been pre-computed");
+ VPValue *EndValue = EndValues.lookup(WideIV);
+ assert(EndValue && "end value must have been pre-computed");
+
+ // This only happens if Incoming is the increment of an induction recipe.
+ if (Incoming != WideIV)
+ return EndValue;
+
+ // Otherwise subtract the step from the EndValue.
+ VPBuilder B(cast<VPBasicBlock>(PredVPBB)->getTerminator());
+ VPValue *Step = WideIV->getStepValue();
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+ VPValue *Escape = nullptr;
+ if (ScalarTy->isIntegerTy()) {
+ Escape =
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+ } else if (ScalarTy->isPointerTy()) {
+ auto *Zero = Plan.getOrAddLiveIn(
+ ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
+ Escape =
+ B.createPtrAdd(EndValue, B.createNaryOp(Instruction::Sub, {Zero, Step}),
+ {}, "ind.escape");
+ } else if (ScalarTy->isFloatingPointTy()) {
+ const auto &ID = WideIV->getInductionDescriptor();
+ Escape = B.createNaryOp(
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
+ return Escape;
+}
- if (Incoming != WideIV) {
- ExitIRI->setOperand(0, EndValue);
- continue;
- }
+void VPlanTransforms::optimizeInductionExitUsers(
+ VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
+ VPBlockBase *MiddleVPBB = Plan.getMiddleBlock();
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+ for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
+ for (VPRecipeBase &R : *ExitVPBB) {
+ auto *ExitIRI = cast<VPIRInstruction>(&R);
+ if (!isa<PHINode>(ExitIRI->getInstruction()))
+ break;
- VPValue *Escape = nullptr;
- VPValue *Step = WideIV->getStepValue();
- Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
- if (ScalarTy->isIntegerTy()) {
- Escape =
- B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
- } else if (ScalarTy->isPointerTy()) {
- auto *Zero = Plan.getOrAddLiveIn(
- ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
- Escape = B.createPtrAdd(EndValue,
- B.createNaryOp(Instruction::Sub, {Zero, Step}),
- {}, "ind.escape");
- } else if (ScalarTy->isFloatingPointTy()) {
- const auto &ID = WideIV->getInductionDescriptor();
- Escape = B.createNaryOp(
- ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
- ? Instruction::FSub
- : Instruction::FAdd,
- {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
- } else {
- llvm_unreachable("all possible induction types must be handled");
+ for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
+ if (PredVPBB == MiddleVPBB)
+ if (VPValue *Escape = optimizeLatchExitInductionUser(
+ Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx),
+ EndValues))
+ ExitIRI->setOperand(Idx, Escape);
+ // TODO: Optimize early exit induction users in follow-on patch.
+ }
}
- ExitIRI->setOperand(0, Escape);
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index e24c6090b704b..616dd3c2caa4a 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -351,7 +351,6 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -365,11 +364,9 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.split:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
@@ -391,7 +388,7 @@ define i64 @same_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -451,7 +448,6 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.split:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
@@ -475,7 +471,7 @@ define i64 @same_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
;
entry:
@@ -597,9 +593,6 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
@@ -607,10 +600,6 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
@@ -642,7 +631,7 @@ define i64 @same_exit_block_post_inc_use() {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -684,7 +673,6 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
@@ -709,11 +697,9 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.split:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
@@ -737,7 +723,7 @@ define i64 @same_exit_block_post_inc_use2() {
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -869,7 +855,6 @@ define i64 @diff_exit_block_pre_inc_use2() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -883,13 +868,11 @@ define i64 @diff_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]]
@@ -912,7 +895,7 @@ define i64 @diff_exit_block_pre_inc_use2() {
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ], [ 67, [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL2]]
;
entry:
@@ -978,7 +961,6 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
@@ -1003,7 +985,7 @@ define i64 @diff_exit_block_pre_inc_use3() {
; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[INDEX_LCSSA]]
; CHECK: loop.end:
-; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]]
;
entry:
@@ -1050,9 +1032,6 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
@@ -1060,10 +1039,6 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
@@ -1098,7 +1073,7 @@ define i64 @diff_exit_block_post_inc_use1() {
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL2]]
;
entry:
@@ -1144,7 +1119,6 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
@@ -1169,13 +1143,11 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP16]], splat (i1 true)
; CHECK-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP19]]
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP18]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: vector.early.exit:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 true)
@@ -1200,7 +1172,7 @@ define i64 @diff_exit_block_post_inc_use2() {
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL2]]
;
entry:
More information about the llvm-commits
mailing list