[llvm] [VPlan] Fix wrap-flags on WidenInduction unroll (PR #187710)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 20 07:42:59 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Due to a somewhat recent change, IntOrFpInduction recipes have associated VPIRFlags. The VPlanUnroll logic for WidenInduction recipes predates this change, and computes incorrect wrap-flags: update it to simply use the flags on IntOrFpInduction recipes; PointerInduction recipes have no associated flags, and indeed, no flags should be used.
Unfortunately, the change requires us to disable instcombine fixpoint-verification in the test induction.ll.
---
Patch is 164.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187710.diff
48 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+14)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+3-5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+12-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll (+11-11)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+57-57)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+15-15)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+30-30)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll (+36-36)
- (modified) llvm/test/Transforms/LoopVectorize/metadata.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/predicate-switch.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll (+9-6)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-assume.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9ed9d07151d7f..d2760cc107269 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1030,6 +1030,20 @@ class VPIRFlags {
}
}
+ bool hasNoWrapFlags() const {
+ switch (OpType) {
+ case OperationType::OverflowingBinOp:
+ case OperationType::Trunc:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ WrapFlagsTy getNoWrapFlags() const {
+ return {hasNoUnsignedWrap(), hasNoSignedWrap()};
+ }
+
bool isDisjoint() const {
assert(OpType == OperationType::DisjointOp &&
"recipe cannot have a disjoing flag");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c76b78c0cbb4f..e736507fe36e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3409,11 +3409,9 @@ void VPlanTransforms::addExplicitVectorLength(
OpVPEVL = Builder.createScalarZExtOrTrunc(
OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
- auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration,
- CanonicalIVIncrement->getDebugLoc(),
- "current.iteration.next",
- {CanonicalIVIncrement->hasNoUnsignedWrap(),
- CanonicalIVIncrement->hasNoSignedWrap()});
+ auto *NextIter = Builder.createAdd(
+ OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
+ "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
CurrentIteration->addOperand(NextIter);
VPValue *NextAVL =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 22d4973a4c1d8..c90947e888cf7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -179,17 +179,22 @@ void UnrollState::unrollWidenInductionByUF(
IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
Type *IVTy = TypeInfo.inferScalarType(IV);
auto &ID = IV->getInductionDescriptor();
- VPIRFlags Flags;
- if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
- Flags = ID.getInductionBinOp()->getFastMathFlags();
+ FastMathFlags FMF;
+ VPIRFlags::WrapFlagsTy WrapFlags(false, false);
+ if (auto *IntOrFPInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(IV)) {
+ if (IntOrFPInd->hasFastMathFlags())
+ FMF = IntOrFPInd->getFastMathFlags();
+ if (IntOrFPInd->hasNoWrapFlags())
+ WrapFlags = IntOrFPInd->getNoWrapFlags();
+ }
VPValue *ScalarStep = IV->getStepValue();
VPBuilder Builder(PH);
Type *VectorStepTy =
IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
VPInstruction *VectorStep = Builder.createNaryOp(
- VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
- Flags, IV->getDebugLoc());
+ VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, FMF,
+ IV->getDebugLoc());
ToSkip.insert(VectorStep);
@@ -214,12 +219,10 @@ void UnrollState::unrollWidenInductionByUF(
AddFlags = GEPNoWrapFlags::none();
} else if (IVTy->isFloatingPointTy()) {
AddOpc = ID.getInductionOpcode();
- AddFlags = Flags; // FMF flags
+ AddFlags = FMF;
} else {
AddOpc = Instruction::Add;
- AddFlags = VPIRFlags::getDefaultFlags(AddOpc);
- if (cast<VPWidenIntOrFpInductionRecipe>(IV)->isCanonical())
- AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false);
+ AddFlags = WrapFlags;
}
for (unsigned Part = 1; Part != UF; ++Part) {
std::string Name =
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index e5efddaac9d1a..4105ea4f78284 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1106,7 +1106,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
+; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
index 4eced1640bd14..ffb733aa4a5b5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
@@ -24,7 +24,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <16 x i8> [[VEC_IND]], splat (i8 16)
; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
@@ -84,12 +84,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP1]], align 8
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3
@@ -199,12 +199,12 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 {
; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC8]]
; CHECK-NEXT: br i1 [[CMP_N20]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX20:%.*]] = phi i32 [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL19]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX20]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[START]], 0
; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[RED_NEXT]] = select i1 [[C]], i32 [[IV_TRUNC]], i32 [[RED]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index eea496303a206..4cd1a0207fcf2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -115,7 +115,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4
@@ -198,7 +198,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
@@ -281,7 +281,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10)
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10)
@@ -397,7 +397,7 @@ define void @test_widen_truncated_induction(ptr %A) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i8> [[VEC_IND]], splat (i8 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2
; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 25e18692da8cd..29241fe83401a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -65,7 +65,7 @@ define void @test_invar_gep(ptr %dst) #0 {
; IC2: vector.body:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP22:%.*]] = add nuw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
+; IC2-NEXT: [[TMP22:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
; IC2-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; IC2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4
; IC2-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1
@@ -179,7 +179,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
; IC2: vector.body:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP9]]
+; IC2-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP9]]
; IC2-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
; IC2-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4
; IC2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1
@@ -303,7 +303,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; IC2: vector.body:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP13]]
+; IC2-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP13]]
; IC2-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; IC2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4
; IC2-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
index e5886d83c0182..fed7d634ab541 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
@@ -13,7 +13,7 @@ define i64 @reduction(i64 %arg) #0 {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[VEC_PHI]], splat (i32 1)
; CHECK-NEXT: [[TMP1]] = or <4 x i32> [[VEC_PHI2]], splat (i32 1)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
index 2c82cae589036..269bf2e8b3157 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
@@ -164,9 +164,9 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i32> [[STEP_ADD]], splat (i32 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i32> [[STEP_ADD_2]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 6
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 0f18b29b25d6d..f11a78f624037 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -152,7 +152,7 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
@@ -205,9 +205,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
index 17a64357b9116..133025f7b01be 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
@@ -18,7 +18,7 @@ define i64 @test_vecto...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/187710
More information about the llvm-commits
mailing list