[llvm] f108c6c - [VPlan] Fold (MUL A, 1) -> A as VPlan2VPlan transform.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 18 13:45:42 PDT 2023
Author: Florian Hahn
Date: 2023-09-18T21:45:34+01:00
New Revision: f108c6cdc1d0cd60f7d50cdb61ed6307258e5f14
URL: https://github.com/llvm/llvm-project/commit/f108c6cdc1d0cd60f7d50cdb61ed6307258e5f14
DIFF: https://github.com/llvm/llvm-project/commit/f108c6cdc1d0cd60f7d50cdb61ed6307258e5f14.diff
LOG: [VPlan] Fold (MUL A, 1) -> A as VPlan2VPlan transform.
Add first VPlan-based recipe simplification to fold (MUL A, 1) -> A.
Among other things, this enables additional simplifications after
applying versioned strides, as follow up to D147783.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D159200
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2c14219f6a304cb..592063520a25b4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -799,11 +799,55 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
+/// Returns true is \p V is constant one.
+static bool isConstantOne(VPValue *V) {
+ if (!V->isLiveIn())
+ return false;
+ auto *C = dyn_cast<ConstantInt>(V->getLiveInIRValue());
+ return C && C->isOne();
+}
+
+/// Returns the llvm::Instruction opcode for \p R.
+static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
+ if (auto *WidenR = dyn_cast<VPWidenRecipe>(&R))
+ return WidenR->getUnderlyingInstr()->getOpcode();
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R))
+ return RepR->getUnderlyingInstr()->getOpcode();
+ if (auto *VPI = dyn_cast<VPInstruction>(&R))
+ return VPI->getOpcode();
+ return 0;
+}
+
+/// Try to simplify recipe \p R.
+static void simplifyRecipe(VPRecipeBase &R) {
+ unsigned Opcode = getOpcodeForRecipe(R);
+ if (Opcode == Instruction::Mul) {
+ VPValue *A = R.getOperand(0);
+ VPValue *B = R.getOperand(1);
+ if (isConstantOne(A))
+ return R.getVPSingleValue()->replaceAllUsesWith(B);
+ if (isConstantOne(B))
+ return R.getVPSingleValue()->replaceAllUsesWith(A);
+ }
+}
+
+/// Try to simplify the recipes in \p Plan.
+static void simplifyRecipes(VPlan &Plan) {
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ simplifyRecipe(R);
+ }
+ }
+}
+
void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
removeRedundantCanonicalIVs(Plan);
removeRedundantInductionCasts(Plan);
optimizeInductions(Plan, SE);
+ simplifyRecipes(Plan);
removeDeadRecipes(Plan);
createAndOptimizeReplicateRegions(Plan);
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
index a02c68df84cf2c4..d6c643df955a7b5 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
@@ -16,18 +16,17 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP7]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -38,8 +37,8 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 1
; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
-; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP9]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]]
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]]
; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1
@@ -341,18 +340,17 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
-; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP4]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP5]], ptr [[TMP7]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
+; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -363,8 +361,8 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], [[STRIDE]]
; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
-; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP9]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]]
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]]
; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index e73fda44195fc86..db7cfee9cd6b089 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -233,17 +233,16 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED: vector.body:
; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NOSTRIDED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
-; NOSTRIDED-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[TMP5]], 1
-; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP6]]
-; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 0
-; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
-; NOSTRIDED-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[TMP8]], align 4
-; NOSTRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4
-; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; NOSTRIDED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; NOSTRIDED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; NOSTRIDED-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP5]]
+; NOSTRIDED-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
+; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
+; NOSTRIDED-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP8]], ptr [[TMP7]], align 4
+; NOSTRIDED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
+; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
+; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; NOSTRIDED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; NOSTRIDED: middle.block:
; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -450,19 +449,18 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED: vector.body:
; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NOSTRIDED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
-; NOSTRIDED-NEXT: [[TMP10:%.*]] = mul nuw nsw i64 [[TMP9]], 1
-; NOSTRIDED-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP10]]
-; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0
-; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP12]], align 4
-; NOSTRIDED-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP10]]
-; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0
-; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; NOSTRIDED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; NOSTRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 4
-; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]]
-; NOSTRIDED-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; NOSTRIDED-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; NOSTRIDED-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP9]]
+; NOSTRIDED-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
+; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP11]], align 4
+; NOSTRIDED-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; NOSTRIDED-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP9]]
+; NOSTRIDED-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 0
+; NOSTRIDED-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
+; NOSTRIDED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; NOSTRIDED-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
+; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
+; NOSTRIDED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; NOSTRIDED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; NOSTRIDED: middle.block:
; NOSTRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
; NOSTRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll
index b6c6eff83d5b25b..3e2122355d81b03 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll
@@ -17,19 +17,17 @@ define void @test(ptr %A, i32 %x) {
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP1]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
-; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
+; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 undef, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
More information about the llvm-commits
mailing list