[llvm] [VPlan] Fix wrap-flags on WidenInduction unroll (PR #187710)

Fri Mar 20 07:42:59 PDT 2026

llvmbot wrote:




@llvm/pr-subscribers-vectorizers

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

Due to a somewhat recent change, IntOrFpInduction recipes have associated VPIRFlags. The VPlanUnroll logic for WidenInduction recipes predates this change, and computes incorrect wrap-flags: update it to simply use the flags on IntOrFpInduction recipes; PointerInduction recipes have no associated flags, and indeed, no flags should be used.

Unfortunately, the change requires us to disable instcombine fixpoint-verification in the test induction.ll.

---

Patch is 164.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/187710.diff


48 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+14) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+3-5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+12-9) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll (+9-9) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll (+11-11) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+57-57) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+15-15) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll (+6-6) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+30-30) 
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll (+36-36) 
- (modified) llvm/test/Transforms/LoopVectorize/metadata.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/predicate-switch.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll (+9-6) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-assume.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll (+3-3) 
- (modified) llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll (+1-1) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9ed9d07151d7f..d2760cc107269 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1030,6 +1030,20 @@ class VPIRFlags {
     }
   }
 
+  bool hasNoWrapFlags() const {
+    switch (OpType) {
+    case OperationType::OverflowingBinOp:
+    case OperationType::Trunc:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  WrapFlagsTy getNoWrapFlags() const {
+    return {hasNoUnsignedWrap(), hasNoSignedWrap()};
+  }
+
   bool isDisjoint() const {
     assert(OpType == OperationType::DisjointOp &&
            "recipe cannot have a disjoing flag");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c76b78c0cbb4f..e736507fe36e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3409,11 +3409,9 @@ void VPlanTransforms::addExplicitVectorLength(
   OpVPEVL = Builder.createScalarZExtOrTrunc(
       OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
 
-  auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration,
-                                     CanonicalIVIncrement->getDebugLoc(),
-                                     "current.iteration.next",
-                                     {CanonicalIVIncrement->hasNoUnsignedWrap(),
-                                      CanonicalIVIncrement->hasNoSignedWrap()});
+  auto *NextIter = Builder.createAdd(
+      OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
+      "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
   CurrentIteration->addOperand(NextIter);
 
   VPValue *NextAVL =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 22d4973a4c1d8..c90947e888cf7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -179,17 +179,22 @@ void UnrollState::unrollWidenInductionByUF(
       IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
   Type *IVTy = TypeInfo.inferScalarType(IV);
   auto &ID = IV->getInductionDescriptor();
-  VPIRFlags Flags;
-  if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
-    Flags = ID.getInductionBinOp()->getFastMathFlags();
+  FastMathFlags FMF;
+  VPIRFlags::WrapFlagsTy WrapFlags(false, false);
+  if (auto *IntOrFPInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(IV)) {
+    if (IntOrFPInd->hasFastMathFlags())
+      FMF = IntOrFPInd->getFastMathFlags();
+    if (IntOrFPInd->hasNoWrapFlags())
+      WrapFlags = IntOrFPInd->getNoWrapFlags();
+  }
 
   VPValue *ScalarStep = IV->getStepValue();
   VPBuilder Builder(PH);
   Type *VectorStepTy =
       IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
   VPInstruction *VectorStep = Builder.createNaryOp(
-      VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
-      Flags, IV->getDebugLoc());
+      VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, FMF,
+      IV->getDebugLoc());
 
   ToSkip.insert(VectorStep);
 
@@ -214,12 +219,10 @@ void UnrollState::unrollWidenInductionByUF(
     AddFlags = GEPNoWrapFlags::none();
   } else if (IVTy->isFloatingPointTy()) {
     AddOpc = ID.getInductionOpcode();
-    AddFlags = Flags; // FMF flags
+    AddFlags = FMF;
   } else {
     AddOpc = Instruction::Add;
-    AddFlags = VPIRFlags::getDefaultFlags(AddOpc);
-    if (cast<VPWidenIntOrFpInductionRecipe>(IV)->isCanonical())
-      AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false);
+    AddFlags = WrapFlags;
   }
   for (unsigned Part = 1; Part != UF; ++Part) {
     std::string Name =
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index e5efddaac9d1a..4105ea4f78284 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1106,7 +1106,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
 ; DEFAULT:       [[VECTOR_BODY]]:
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT:    [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
+; DEFAULT-NEXT:    [[STEP_ADD:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
index 4eced1640bd14..ffb733aa4a5b5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll
@@ -24,7 +24,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw nsw <16 x i8> [[VEC_IND]], splat (i8 16)
 ; CHECK-NEXT:    [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
@@ -84,12 +84,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
 ; CHECK-NEXT:    [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]]
 ; CHECK-NEXT:    br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]]
 ; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[GEP1]], align 8
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[L]], 3
@@ -199,12 +199,12 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 {
 ; CHECK-NEXT:    [[CMP_N20:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC8]]
 ; CHECK-NEXT:    br i1 [[CMP_N20]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT:    [[BC_RESUME_VAL19:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX20:%.*]] = phi i32 [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL19]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX20]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[START]], 0
 ; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
 ; CHECK-NEXT:    [[RED_NEXT]] = select i1 [[C]], i32 [[IV_TRUNC]], i32 [[RED]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index eea496303a206..4cd1a0207fcf2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -115,7 +115,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2
 ; CHECK-NEXT:    store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4
@@ -198,7 +198,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2
@@ -281,7 +281,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10)
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10)
@@ -397,7 +397,7 @@ define void @test_widen_truncated_induction(ptr %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw nsw <2 x i8> [[VEC_IND]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2
 ; CHECK-NEXT:    store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index 25e18692da8cd..29241fe83401a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -65,7 +65,7 @@ define void @test_invar_gep(ptr %dst) #0 {
 ; IC2:       vector.body:
 ; IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IC2-NEXT:    [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT:    [[TMP22:%.*]] = add nuw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
+; IC2-NEXT:    [[TMP22:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
 ; IC2-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
 ; IC2-NEXT:    [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4
 ; IC2-NEXT:    [[TMP8:%.*]] = sub i32 [[TMP7]], 1
@@ -179,7 +179,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
 ; IC2:       vector.body:
 ; IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IC2-NEXT:    [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT:    [[TMP10:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP9]]
+; IC2-NEXT:    [[TMP10:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP9]]
 ; IC2-NEXT:    [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
 ; IC2-NEXT:    [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4
 ; IC2-NEXT:    [[TMP14:%.*]] = sub i32 [[TMP13]], 1
@@ -303,7 +303,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
 ; IC2:       vector.body:
 ; IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IC2-NEXT:    [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT:    [[TMP14:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP13]]
+; IC2-NEXT:    [[TMP14:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP13]]
 ; IC2-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
 ; IC2-NEXT:    [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4
 ; IC2-NEXT:    [[TMP19:%.*]] = sub i32 [[TMP18]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
index e5886d83c0182..fed7d634ab541 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
@@ -13,7 +13,7 @@ define i64 @reduction(i64 %arg) #0 {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP5]] = or <4 x i32> [[VEC_PHI]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP1]] = or <4 x i32> [[VEC_PHI2]], splat (i32 1)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
index 2c82cae589036..269bf2e8b3157 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
@@ -164,9 +164,9 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add nuw <2 x i32> [[STEP_ADD]], splat (i32 2)
-; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add nuw <2 x i32> [[STEP_ADD_2]], splat (i32 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 6
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 0f18b29b25d6d..f11a78f624037 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -152,7 +152,7 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
@@ -205,9 +205,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
-; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
index 17a64357b9116..133025f7b01be 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
@@ -18,7 +18,7 @@ define i64 @test_vecto...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/187710