[llvm] 37809c8 - [VPlan] Support sinking VPScalarIVStepsRecipe.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 4 14:59:48 PST 2022
Author: Florian Hahn
Date: 2022-12-04T22:59:17Z
New Revision: 37809c867af5e0dc9fa677971b98f04d0b2aa15d
URL: https://github.com/llvm/llvm-project/commit/37809c867af5e0dc9fa677971b98f04d0b2aa15d
DIFF: https://github.com/llvm/llvm-project/commit/37809c867af5e0dc9fa677971b98f04d0b2aa15d.diff
LOG: [VPlan] Support sinking VPScalarIVStepsRecipe.
This patch extends VP-based sinking to also sink VPScalarStepsRecipe.
This takes us a step closer towards retiring the IR based sinking.
The main change is extending VPScalarIVStepsRecipe::execute to support
executing in a replicate-region.
Depends on D133758.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D133760
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
llvm/test/Transforms/LoopVectorize/float-induction.ll
llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 051eb99684f85..9f6718b32f571 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2361,7 +2361,6 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
// Determine the number of scalars we need to generate for each unroll
// iteration.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def);
- unsigned Lanes = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
// Compute the scalar steps and save the results in State.
Type *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
ScalarIVTy->getScalarSizeInBits());
@@ -2375,7 +2374,17 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
SplatIV = Builder.CreateVectorSplat(State.VF, ScalarIV);
}
- for (unsigned Part = 0; Part < State.UF; ++Part) {
+ unsigned StartPart = 0;
+ unsigned EndPart = State.UF;
+ unsigned StartLane = 0;
+ unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
+ if (State.Instance) {
+ StartPart = State.Instance->Part;
+ EndPart = StartPart + 1;
+ StartLane = State.Instance->Lane.getKnownLane();
+ EndLane = StartLane + 1;
+ }
+ for (unsigned Part = StartPart; Part < EndPart; ++Part) {
Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
if (!FirstLaneOnly && State.VF.isScalable()) {
@@ -2394,7 +2403,7 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
if (ScalarIVTy->isFloatingPointTy())
StartIdx0 = Builder.CreateSIToFP(StartIdx0, ScalarIVTy);
- for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+ for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
AddOp, StartIdx0, getSignedIntOrFpConstant(ScalarIVTy, Lane));
// The step returned by `createStepForVF` is a runtime-evaluated value
@@ -9557,8 +9566,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
}
void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "VPScalarIVStepsRecipe being replicated.");
-
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
if (IndDesc.getInductionBinOp() &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b3c543f0f8ba1..dac8b4e7941d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -111,28 +111,32 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
bool Changed = false;
// First, collect the operands of all predicated replicate recipes as seeds
// for sinking.
- SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList;
+ SetVector<std::pair<VPBasicBlock *, VPRecipeBase *>> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (auto &Recipe : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
if (!RepR || !RepR->isPredicated())
continue;
for (VPValue *Op : RepR->operands())
- WorkList.insert(std::make_pair(RepR->getParent(), Op));
+ if (auto *Def = Op->getDefiningRecipe())
+ WorkList.insert(std::make_pair(RepR->getParent(), Def));
}
}
- // Try to sink each replicate recipe in the worklist.
+ // Try to sink each replicate or scalar IV steps recipe in the worklist.
while (!WorkList.empty()) {
VPBasicBlock *SinkTo;
- VPValue *C;
- std::tie(SinkTo, C) = WorkList.pop_back_val();
- auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def);
- if (!SinkCandidate || SinkCandidate->isUniform() ||
- SinkCandidate->getParent() == SinkTo ||
+ VPRecipeBase *SinkCandidate;
+ std::tie(SinkTo, SinkCandidate) = WorkList.pop_back_val();
+ if (SinkCandidate->getParent() == SinkTo ||
SinkCandidate->mayHaveSideEffects() ||
SinkCandidate->mayReadOrWriteMemory())
continue;
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
+ if (RepR->isUniform())
+ continue;
+ } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
+ continue;
bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
@@ -146,27 +150,31 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
return false;
if (UI->getParent() == SinkTo)
return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
- return NeedsDuplicating;
+ NeedsDuplicating =
+ UI->onlyFirstLaneUsed(SinkCandidate->getVPSingleValue());
+ // We only know how to duplicate VPRecipeRecipes for now.
+ return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
};
- if (!all_of(SinkCandidate->users(), CanSinkWithUser))
+ if (!all_of(SinkCandidate->getVPSingleValue()->users(), CanSinkWithUser))
continue;
if (NeedsDuplicating) {
- Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue());
+ Instruction *I = cast<Instruction>(
+ cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue());
auto *Clone =
new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
- SmallVector<VPUser *, 4> Users(SinkCandidate->users());
+ SmallVector<VPUser *, 4> Users(
+ SinkCandidate->getVPSingleValue()->users());
for (auto *U : Users) {
auto *UI = cast<VPRecipeBase>(U);
if (UI->getParent() == SinkTo)
continue;
for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
- if (UI->getOperand(Idx) != SinkCandidate)
+ if (UI->getOperand(Idx) != SinkCandidate->getVPSingleValue())
continue;
UI->setOperand(Idx, Clone);
}
@@ -174,7 +182,8 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
}
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
for (VPValue *Op : SinkCandidate->operands())
- WorkList.insert(std::make_pair(SinkTo, Op));
+ if (auto *Def = Op->getDefiningRecipe())
+ WorkList.insert(std::make_pair(SinkTo, Def));
Changed = true;
}
return Changed;
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index d229c32a200c7..821cf8fbdcd04 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -117,7 +117,6 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
@@ -137,6 +136,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, i32* %ptr) optsize {
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
; CHECK-NEXT: Successor(s): pred.store.continue
@@ -376,7 +376,6 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
@@ -413,6 +412,7 @@ define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<[[PRED]]>
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep>
; CHECK-NEXT: Successor(s): pred.store.continue
@@ -468,7 +468,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, i32* %src, i32* noalia
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%.pn> = phi ir<0>, ir<[[L:%.+]]>
; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<2> + vp<[[CAN_IV]]> * ir<1>
-; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule vp<[[WIDE_IV]]> vp<[[BTC]]>
; CHECK-NEXT: Successor(s): loop.0
@@ -488,6 +487,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, i32* %src, i32* noalia
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x>
+; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst>
; CHECK-NEXT: Successor(s): pred.store.continue
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 3e48d4e4f758f..1bd5ad19fb059 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -1408,8 +1408,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
; VEC4_INTERL1: pred.store.if3:
-; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1
+; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
; VEC4_INTERL1-NEXT: store float [[TMP7]], float* [[TMP9]], align 4
; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE3]]
@@ -1417,8 +1417,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
; VEC4_INTERL1-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; VEC4_INTERL1: pred.store.if5:
-; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2
+; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
; VEC4_INTERL1-NEXT: store float [[TMP11]], float* [[TMP13]], align 4
; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE5]]
@@ -1426,8 +1426,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
; VEC4_INTERL1-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
; VEC4_INTERL1: pred.store.if7:
-; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3
+; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
; VEC4_INTERL1-NEXT: store float [[TMP15]], float* [[TMP17]], align 4
; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE7]]
@@ -1491,8 +1491,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1
; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; VEC4_INTERL2: pred.store.if4:
-; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1
+; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]]
@@ -1500,8 +1500,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2
; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; VEC4_INTERL2: pred.store.if6:
-; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2
+; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE6]]
@@ -1509,8 +1509,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3
; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; VEC4_INTERL2: pred.store.if8:
-; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3
+; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]]
; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE8]]
@@ -1526,8 +1526,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1
; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; VEC4_INTERL2: pred.store.if12:
-; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00
; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5
+; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00
; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]]
; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE12]]
@@ -1535,8 +1535,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2
; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; VEC4_INTERL2: pred.store.if14:
-; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00
; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6
+; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00
; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE14]]
@@ -1544,8 +1544,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3
; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]]
; VEC4_INTERL2: pred.store.if16:
-; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00
; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7
+; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00
; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]]
; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4
; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE16]]
@@ -1662,8 +1662,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
; VEC2_INTERL1_PRED_STORE: pred.store.if3:
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], float* [[TMP9]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE3]]
diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
index 8e7bd9c34aff7..f73c5959ee8c3 100644
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -48,7 +48,6 @@ for.end:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[COND:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%cond0> = icmp ult ir<%iv>, ir<13>
; CHECK-NEXT: WIDEN-SELECT ir<%s> = select ir<%cond0>, ir<10>, ir<20>
@@ -60,6 +59,7 @@ for.end:
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep>
; CHECK-NEXT: Successor(s): pred.store.continue
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index c74032fb1c604..1e695e696bbb3 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -89,7 +89,6 @@ exit:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: Successor(s): pred.load
@@ -99,6 +98,7 @@ exit:
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK: pred.load.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
; CHECK-NEXT: Successor(s): pred.load.continue
@@ -168,7 +168,6 @@ exit:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: Successor(s): pred.load
@@ -178,6 +177,7 @@ exit:
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK: pred.load.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
; CHECK-NEXT: Successor(s): pred.load.continue
@@ -250,7 +250,6 @@ define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) {
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 21, %iv.next, ir<1>
; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]> vp<[[BTC]]>
; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0>
@@ -269,6 +268,7 @@ define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.B> = getelementptr ir<%B>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%lv>, ir<%gep.B>
; CHECK-NEXT: Successor(s): pred.store.continue
@@ -325,7 +325,6 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%c.1> = icmp ult ir<%iv>, ir<%j>
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
@@ -341,6 +340,7 @@ define void @pred_cfg1(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
; CHECK-NEXT: Successor(s): pred.load.continue
@@ -426,7 +426,6 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
@@ -443,6 +442,7 @@ define void @pred_cfg2(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
; CHECK-NEXT: Successor(s): pred.load.continue
@@ -542,7 +542,6 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
-; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10>
; CHECK-NEXT: WIDEN ir<%c.0> = icmp ult ir<%iv>, ir<%j>
@@ -558,6 +557,7 @@ define void @pred_cfg3(i32 %k, i32 %j) {
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.load.if:
+; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
; CHECK-NEXT: Successor(s): pred.load.continue
More information about the llvm-commits
mailing list