[clang] 462cb3c - [InstCombine] Infer nusw + nneg -> nuw for getelementptr (#111144)

via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 5 05:36:50 PST 2024


Author: Nikita Popov
Date: 2024-12-05T14:36:40+01:00
New Revision: 462cb3cd6cecd0511ecaf0e3ebcaba455ece587d

URL: https://github.com/llvm/llvm-project/commit/462cb3cd6cecd0511ecaf0e3ebcaba455ece587d
DIFF: https://github.com/llvm/llvm-project/commit/462cb3cd6cecd0511ecaf0e3ebcaba455ece587d.diff

LOG: [InstCombine] Infer nusw + nneg -> nuw for getelementptr (#111144)

If the gep is nusw (usually via inbounds) and the offset is
non-negative, we can infer nuw.

Proof: https://alive2.llvm.org/ce/z/ihztLy

Added: 
    

Modified: 
    clang/test/CodeGen/AArch64/ls64-inline-asm.c
    clang/test/CodeGen/AArch64/pure-scalable-args.c
    clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
    clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c
    clang/test/CodeGen/arm64_32-vaarg.c
    clang/test/CodeGen/attr-counted-by-pr110385.c
    clang/test/CodeGen/attr-counted-by.c
    clang/test/CodeGen/math-libcalls-tbaa.c
    clang/test/CodeGen/union-tbaa1.c
    clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
    clang/test/CodeGenCXX/auto-var-init.cpp
    clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
    clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
    clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn.cl
    clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
    clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Analysis/BasicAA/featuretest.ll
    llvm/test/Analysis/ValueTracking/phi-known-bits.ll
    llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
    llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
    llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
    llvm/test/Transforms/Coroutines/coro-async.ll
    llvm/test/Transforms/Coroutines/coro-retcon-alloca-opaque-ptr.ll
    llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
    llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
    llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
    llvm/test/Transforms/Coroutines/coro-swifterror.ll
    llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
    llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
    llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
    llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
    llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
    llvm/test/Transforms/InstCombine/array.ll
    llvm/test/Transforms/InstCombine/assume-align.ll
    llvm/test/Transforms/InstCombine/assume-loop-align.ll
    llvm/test/Transforms/InstCombine/assume-redundant.ll
    llvm/test/Transforms/InstCombine/assume.ll
    llvm/test/Transforms/InstCombine/call-cast-target.ll
    llvm/test/Transforms/InstCombine/cast_phi.ll
    llvm/test/Transforms/InstCombine/cast_ptr.ll
    llvm/test/Transforms/InstCombine/catchswitch-phi.ll
    llvm/test/Transforms/InstCombine/compare-alloca.ll
    llvm/test/Transforms/InstCombine/compare-unescaped.ll
    llvm/test/Transforms/InstCombine/dependent-ivs.ll
    llvm/test/Transforms/InstCombine/extractvalue.ll
    llvm/test/Transforms/InstCombine/fmul.ll
    llvm/test/Transforms/InstCombine/fsh.ll
    llvm/test/Transforms/InstCombine/gep-addrspace.ll
    llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
    llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
    llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
    llvm/test/Transforms/InstCombine/gep-vector-indices.ll
    llvm/test/Transforms/InstCombine/gepphigep.ll
    llvm/test/Transforms/InstCombine/getelementptr.ll
    llvm/test/Transforms/InstCombine/icmp-custom-dl.ll
    llvm/test/Transforms/InstCombine/icmp-gep.ll
    llvm/test/Transforms/InstCombine/icmp.ll
    llvm/test/Transforms/InstCombine/inbounds-gep.ll
    llvm/test/Transforms/InstCombine/indexed-gep-compares.ll
    llvm/test/Transforms/InstCombine/intptr1.ll
    llvm/test/Transforms/InstCombine/intptr7.ll
    llvm/test/Transforms/InstCombine/load-bitcast-select.ll
    llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
    llvm/test/Transforms/InstCombine/memccpy.ll
    llvm/test/Transforms/InstCombine/memcpy_alloca.ll
    llvm/test/Transforms/InstCombine/mempcpy.ll
    llvm/test/Transforms/InstCombine/memset2.ll
    llvm/test/Transforms/InstCombine/opaque-ptr.ll
    llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
    llvm/test/Transforms/InstCombine/phi-timeout.ll
    llvm/test/Transforms/InstCombine/phi.ll
    llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
    llvm/test/Transforms/InstCombine/ptrmask.ll
    llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
    llvm/test/Transforms/InstCombine/select-cmp-br.ll
    llvm/test/Transforms/InstCombine/select-gep.ll
    llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll
    llvm/test/Transforms/InstCombine/snprintf-2.ll
    llvm/test/Transforms/InstCombine/snprintf-3.ll
    llvm/test/Transforms/InstCombine/snprintf-4.ll
    llvm/test/Transforms/InstCombine/snprintf.ll
    llvm/test/Transforms/InstCombine/sprintf-1.ll
    llvm/test/Transforms/InstCombine/stpncpy-1.ll
    llvm/test/Transforms/InstCombine/str-int.ll
    llvm/test/Transforms/InstCombine/strlcpy-1.ll
    llvm/test/Transforms/InstCombine/strlen-1.ll
    llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
    llvm/test/Transforms/InstCombine/sub.ll
    llvm/test/Transforms/InstCombine/unpack-fca.ll
    llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
    llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
    llvm/test/Transforms/InstCombine/vscale_gep.ll
    llvm/test/Transforms/InstCombine/wcslen-1.ll
    llvm/test/Transforms/InstCombine/wcslen-3.ll
    llvm/test/Transforms/InstCombine/wcslen-5.ll
    llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
    llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
    llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
    llvm/test/Transforms/LoopUnroll/peel-loop.ll
    llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
    llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-no-scalar-interleave.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll
    llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
    llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
    llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
    llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
    llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
    llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
    llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
    llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
    llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
    llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
    llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
    llvm/test/Transforms/LoopVectorize/float-induction.ll
    llvm/test/Transforms/LoopVectorize/forked-pointers.ll
    llvm/test/Transforms/LoopVectorize/histograms.ll
    llvm/test/Transforms/LoopVectorize/induction.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
    llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
    llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
    llvm/test/Transforms/LoopVectorize/loop-scalars.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
    llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
    llvm/test/Transforms/LoopVectorize/reduction.ll
    llvm/test/Transforms/LoopVectorize/runtime-check.ll
    llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
    llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
    llvm/test/Transforms/LoopVectorize/vector-geps.ll
    llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
    llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
    llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
    llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
    llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
    llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
    llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
    llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
    llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
    llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
    llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
    llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
    llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
    llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
    llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
    llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll
    llvm/test/Transforms/PhaseOrdering/basic.ll
    llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll
    llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll
    llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll
    llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
    llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
    llvm/test/Transforms/PhaseOrdering/lto-licm.ll
    llvm/test/Transforms/PhaseOrdering/pr39282.ll
    llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll
    llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
    llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
    llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
    llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll
    llvm/test/Transforms/RewriteStatepointsForGC/intrinsics.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
    llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
    llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
    llvm/test/Transforms/SLPVectorizer/X86/opt.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
    llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
    llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/AArch64/ls64-inline-asm.c b/clang/test/CodeGen/AArch64/ls64-inline-asm.c
index a01393525bcd42..8aa0684dba14d0 100644
--- a/clang/test/CodeGen/AArch64/ls64-inline-asm.c
+++ b/clang/test/CodeGen/AArch64/ls64-inline-asm.c
@@ -5,7 +5,7 @@ struct foo { unsigned long long x[8]; };
 
 // CHECK-LABEL: @load(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc !2
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[ADDR:%.*]]) #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]]
 // CHECK-NEXT:    store i512 [[TMP0]], ptr [[OUTPUT:%.*]], align 8
 // CHECK-NEXT:    ret void
 //
@@ -17,7 +17,7 @@ void load(struct foo *output, void *addr)
 // CHECK-LABEL: @store(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load i512, ptr [[INPUT:%.*]], align 8
-// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc !3
+// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[TMP0]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc [[META3:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void store(const struct foo *input, void *addr)
@@ -29,25 +29,25 @@ void store(const struct foo *input, void *addr)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[IN:%.*]], align 4, !tbaa [[TBAA4:![0-9]+]]
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
-// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 4
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 16
+// CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 16
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV5:%.*]] = sext i32 [[TMP2]] to i64
-// CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 64
+// CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 64
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV8:%.*]] = sext i32 [[TMP3]] to i64
-// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 100
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 100
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV11:%.*]] = sext i32 [[TMP4]] to i64
-// CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 144
+// CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 144
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV14:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 196
+// CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 196
 // CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV17:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 256
+// CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i64 256
 // CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CONV20:%.*]] = sext i32 [[TMP7]] to i64
 // CHECK-NEXT:    [[S_SROA_10_0_INSERT_EXT:%.*]] = zext i64 [[CONV20]] to i512
@@ -72,7 +72,7 @@ void store(const struct foo *input, void *addr)
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_EXT:%.*]] = zext i64 [[CONV]] to i512
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_MASK:%.*]] = or disjoint i512 [[S_SROA_4_0_INSERT_MASK]], [[S_SROA_4_0_INSERT_SHIFT]]
 // CHECK-NEXT:    [[S_SROA_0_0_INSERT_INSERT:%.*]] = or i512 [[S_SROA_0_0_INSERT_MASK]], [[S_SROA_0_0_INSERT_EXT]]
-// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc !8
+// CHECK-NEXT:    tail call void asm sideeffect "st64b $0,[$1]", "r,r,~{memory}"(i512 [[S_SROA_0_0_INSERT_INSERT]], ptr [[ADDR:%.*]]) #[[ATTR1]], !srcloc [[META8:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 void store2(int *in, void *addr)

diff  --git a/clang/test/CodeGen/AArch64/pure-scalable-args.c b/clang/test/CodeGen/AArch64/pure-scalable-args.c
index f40c944335e4a4..a8c3dd9288a5bb 100644
--- a/clang/test/CodeGen/AArch64/pure-scalable-args.c
+++ b/clang/test/CodeGen/AArch64/pure-scalable-args.c
@@ -417,7 +417,7 @@ void test_va_arg(int n, ...) {
 // CHECK-AAPCS-EMPTY:
 // CHECK-AAPCS-NEXT: vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
 // CHECK-AAPCS-NEXT:   %stack = load ptr, ptr %ap, align 8
-// CHECK-AAPCS-NEXT:   %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+// CHECK-AAPCS-NEXT:   %new_stack = getelementptr inbounds nuw i8, ptr %stack, i64 8
 // CHECK-AAPCS-NEXT:   store ptr %new_stack, ptr %ap, align 8
 // CHECK-AAPCS-NEXT:   br label %vaarg.end
 // CHECK-AAPCS-EMPTY:
@@ -428,7 +428,7 @@ void test_va_arg(int n, ...) {
 // CHECK-AAPCS-NEXT:   %vaarg.addr = load ptr, ptr %vaargs.addr, align 8
 
 // CHECK-AAPCS-NEXT:   %v.sroa.0.0.copyload = load <2 x i8>, ptr %vaarg.addr, align 16
-// CHECK-AAPCS-NEXT:   %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds i8, ptr %vaarg.addr, i64 48
+// CHECK-AAPCS-NEXT:   %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds nuw i8, ptr %vaarg.addr, i64 48
 // CHECK-AAPCS-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16
 // CHECK-AAPCS-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
 // CHECK-AAPCS-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %v.sroa.0.0.copyload, i64 0)
@@ -445,11 +445,11 @@ void test_va_arg(int n, ...) {
 // CHECK-DARWIN-NEXT:   call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ap)
 // CHECK-DARWIN-NEXT:   call void @llvm.va_start.p0(ptr nonnull %ap)
 // CHECK-DARWIN-NEXT:   %argp.cur = load ptr, ptr %ap, align 8
-// CHECK-DARWIN-NEXT:   %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8
+// CHECK-DARWIN-NEXT:   %argp.next = getelementptr inbounds nuw i8, ptr %argp.cur, i64 8
 // CHECK-DARWIN-NEXT:   store ptr %argp.next, ptr %ap, align 8
 // CHECK-DARWIN-NEXT:   %0 = load ptr, ptr %argp.cur, align 8
 // CHECK-DARWIN-NEXT:   %v.sroa.0.0.copyload = load <2 x i8>, ptr %0, align 16
-// CHECK-DARWIN-NEXT:   %v.sroa.43.0..sroa_idx = getelementptr inbounds i8, ptr %0, i64 48
+// CHECK-DARWIN-NEXT:   %v.sroa.43.0..sroa_idx = getelementptr inbounds nuw i8, ptr %0, i64 48
 // CHECK-DARWIN-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16
 // CHECK-DARWIN-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
 // CHECK-DARWIN-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %v.sroa.0.0.copyload, i64 0)

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
index 5422d993ff1575..08ff936a0a797b 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
@@ -25,13 +25,13 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0
 // CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[RESP]], i64 16
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16
 // CHECK-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 2
-// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[RESP]], i64 32
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 32
 // CHECK-NEXT:    store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 3
-// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[RESP]], i64 48
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 48
 // CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
 // CHECK-NEXT:    ret void
 //
@@ -60,7 +60,7 @@ void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsi
 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
 // CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[RESP]], i64 16
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16
 // CHECK-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
 // CHECK-NEXT:    ret void
 //
@@ -1072,7 +1072,7 @@ void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, uns
 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
 // CHECK-NEXT:    store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
-// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[RESP]], i64 16
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[RESP]], i64 16
 // CHECK-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
 // CHECK-NEXT:    ret void
 //

diff  --git a/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c b/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c
index 82bb7a52d05d11..33cf0ddb472365 100644
--- a/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c
+++ b/clang/test/CodeGen/SystemZ/zos-mixed-ptr-sizes.c
@@ -48,21 +48,21 @@ void test_indexing(struct Foo *f) {
 
 void test_indexing_2(struct Foo *f) {
   // X64-LABEL: define void @test_indexing_2(ptr noundef initializes((16, 24)) %f)
-  // X64: getelementptr inbounds i8, ptr addrspace(1) {{%[0-9]}}, i32 16
-  // X64: getelementptr inbounds i8, ptr {{%[0-9]}}, i64 24
+  // X64: getelementptr inbounds nuw i8, ptr addrspace(1) {{%[0-9]}}, i32 16
+  // X64: getelementptr inbounds nuw i8, ptr {{%[0-9]}}, i64 24
   f->cp64 = ((char *** __ptr32 *)1028)[1][2][3];
   use_foo(f);
 }
 
 unsigned long* test_misc() {
   // X64-LABEL: define ptr @test_misc()
-  // X64: %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %0, i32 88
+  // X64: %arrayidx = getelementptr inbounds nuw i8, ptr addrspace(1) %0, i32 88
   // X64-NEXT: %1 = load ptr, ptr addrspace(1) %arrayidx
-  // X64-NEXT: %arrayidx1 = getelementptr inbounds i8, ptr %1, i64 8
+  // X64-NEXT: %arrayidx1 = getelementptr inbounds nuw i8, ptr %1, i64 8
   // X64-NEXT: %2 = load ptr, ptr %arrayidx1
-  // X64-NEXT: %arrayidx2 = getelementptr inbounds i8, ptr %2, i64 904
+  // X64-NEXT: %arrayidx2 = getelementptr inbounds nuw i8, ptr %2, i64 904
   // X64-NEXT: %3 = load ptr, ptr %arrayidx2
-  // X64-NEXT: %arrayidx3 = getelementptr inbounds i8, ptr %3, i64 1192
+  // X64-NEXT: %arrayidx3 = getelementptr inbounds nuw i8, ptr %3, i64 1192
   unsigned long* x = (unsigned long*)((char***** __ptr32*)1208)[0][11][1][113][149];
   return x;
 }
@@ -71,9 +71,9 @@ char* __ptr32* __ptr32 test_misc_2() {
   // X64-LABEL: define ptr addrspace(1) @test_misc_2()
   // X64: br i1 %cmp, label %if.then, label %if.end
   // X64: %1 = load ptr addrspace(1), ptr inttoptr (i64 16 to ptr)
-  // X64-NEXT: %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %1, i32 544
+  // X64-NEXT: %arrayidx = getelementptr inbounds nuw i8, ptr addrspace(1) %1, i32 544
   // X64-NEXT: %2 = load ptr addrspace(1), ptr addrspace(1) %arrayidx
-  // X64-NEXT: %arrayidx1 = getelementptr inbounds i8, ptr addrspace(1) %2, i32 24
+  // X64-NEXT: %arrayidx1 = getelementptr inbounds nuw i8, ptr addrspace(1) %2, i32 24
   // X64-NEXT: %3 = load ptr addrspace(1), ptr addrspace(1) %arrayidx1
   // X64-NEXT: store ptr addrspace(1) %3, ptr @test_misc_2.res
   // X64: ret ptr addrspace(1)
@@ -88,7 +88,7 @@ unsigned short test_misc_3() {
   // X64-LABEL: define zeroext i16 @test_misc_3()
   // X64: %0 = load ptr addrspace(1), ptr inttoptr (i64 548 to ptr)
   // X64-NEXT: %1 = addrspacecast ptr addrspace(1) %0 to ptr
-  // X64-NEXT: %arrayidx = getelementptr inbounds i8, ptr %1, i64 36
+  // X64-NEXT: %arrayidx = getelementptr inbounds nuw i8, ptr %1, i64 36
   // X64-NEXT: %2 = load i16, ptr %arrayidx, align 2
   // X64-NEXT: ret i16 %2
   unsigned short this_asid = ((unsigned short*)(*(char* __ptr32*)(0x224)))[18];
@@ -97,10 +97,10 @@ unsigned short test_misc_3() {
 
 int test_misc_4() {
   // X64-LABEL: define signext range(i32 0, 2) i32 @test_misc_4()
-  // X64: getelementptr inbounds i8, ptr addrspace(1) {{%[0-9]}}, i32 88
-  // X64: getelementptr inbounds i8, ptr {{%[0-9]}}, i64 8
-  // X64: getelementptr inbounds i8, ptr {{%[0-9]}}, i64 984
-  // X64: getelementptr inbounds i8, ptr %3, i64 80
+  // X64: getelementptr inbounds nuw i8, ptr addrspace(1) {{%[0-9]}}, i32 88
+  // X64: getelementptr inbounds nuw i8, ptr {{%[0-9]}}, i64 8
+  // X64: getelementptr inbounds nuw i8, ptr {{%[0-9]}}, i64 984
+  // X64: getelementptr inbounds nuw i8, ptr %3, i64 80
   // X64: icmp sgt i32 {{.*[0-9]}}, 67240703
   // X64: ret i32
   int a = (*(int*)(80 + ((char**** __ptr32*)1208)[0][11][1][123]) > 0x040202FF);
@@ -189,7 +189,7 @@ int test_function_ptr32_is_32bit() {
 int get_processor_count() {
   // X64-LABEL: define signext range(i32 -128, 128) i32 @get_processor_count()
   // X64: load ptr addrspace(1), ptr inttoptr (i64 16 to ptr)
-  // X64-NEXT: [[ARR_IDX1:%[a-z].*]] = getelementptr inbounds i8, ptr addrspace(1) %0, i32 660
+  // X64-NEXT: [[ARR_IDX1:%[a-z].*]] = getelementptr inbounds nuw i8, ptr addrspace(1) %0, i32 660
   // X64: load ptr addrspace(1), ptr addrspace(1) [[ARR_IDX1]]
   // X64: load i8, ptr addrspace(1) {{%[a-z].*}}
   // X64: sext i8 {{%[0-9]}} to i32

diff  --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c
index 3f1f4443436da1..72c23d4967d2d3 100644
--- a/clang/test/CodeGen/arm64_32-vaarg.c
+++ b/clang/test/CodeGen/arm64_32-vaarg.c
@@ -10,7 +10,7 @@ typedef struct {
 int test_int(OneInt input, va_list *mylist) {
 // CHECK-LABEL: define{{.*}} i32 @test_int(i32 %input
 // CHECK: [[START:%.*]] = load ptr, ptr %mylist
-// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[START]], i32 4
+// CHECK: [[NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i32 4
 // CHECK: store ptr [[NEXT]], ptr %mylist
 
 // CHECK: [[RES:%.*]] = load i32, ptr [[START]]
@@ -28,9 +28,9 @@ typedef struct {
 long long test_longlong(OneLongLong input, va_list *mylist) {
   // CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input
   // CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist
-  // CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], i32 7
+  // CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds nuw i8, ptr [[STARTPTR]], i32 7
   // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
-  // CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], i32 8
+  // CHECK: [[NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[ALIGNED_ADDR]], i32 8
   // CHECK: store ptr [[NEXT]], ptr %mylist
 
   // CHECK: [[RES:%.*]] = load i64, ptr [[ALIGNED_ADDR]]
@@ -49,7 +49,7 @@ float test_hfa(va_list *mylist) {
 // CHECK-LABEL: define{{.*}} float @test_hfa
 // CHECK: [[START:%.*]] = load ptr, ptr %mylist
 
-// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[START]], i32 16
+// CHECK: [[NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i32 16
 // CHECK: store ptr [[NEXT]], ptr %mylist
 
 // CHECK: [[RES:%.*]] = load float, ptr [[START]]
@@ -76,7 +76,7 @@ typedef struct {
 long long test_bigstruct(BigStruct input, va_list *mylist) {
 // CHECK-LABEL: define{{.*}} i64 @test_bigstruct(ptr
 // CHECK: [[START:%.*]] = load ptr, ptr %mylist
-// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[START]], i32 4
+// CHECK: [[NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i32 4
 // CHECK: store ptr [[NEXT]], ptr %mylist
 
 // CHECK: [[ADDR:%.*]] = load ptr, ptr [[START]]
@@ -97,7 +97,7 @@ short test_threeshorts(ThreeShorts input, va_list *mylist) {
 // CHECK-LABEL: define{{.*}} signext i16 @test_threeshorts([2 x i32] %input
 
 // CHECK: [[START:%.*]] = load ptr, ptr %mylist
-// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[START]], i32 8
+// CHECK: [[NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i32 8
 // CHECK: store ptr [[NEXT]], ptr %mylist
 
 // CHECK: [[RES:%.*]] = load i16, ptr [[START]]

diff  --git a/clang/test/CodeGen/attr-counted-by-pr110385.c b/clang/test/CodeGen/attr-counted-by-pr110385.c
index e120dcc583578d..c2ff032334fe27 100644
--- a/clang/test/CodeGen/attr-counted-by-pr110385.c
+++ b/clang/test/CodeGen/attr-counted-by-pr110385.c
@@ -31,7 +31,7 @@ void init(void * __attribute__((pass_dynamic_object_size(0))));
 // CHECK-NEXT:    [[GROWABLE:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GROWABLE]], align 8, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
-// CHECK-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+// CHECK-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // CHECK-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 1
@@ -48,7 +48,7 @@ void test1(struct bucket *foo) {
 // CHECK-SAME: ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 16
-// CHECK-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 12
+// CHECK-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 12
 // CHECK-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // CHECK-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 1

diff  --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c
index a5efe35181b5eb..c11502c9240d78 100644
--- a/clang/test/CodeGen/attr-counted-by.c
+++ b/clang/test/CodeGen/attr-counted-by.c
@@ -60,13 +60,13 @@ struct anon_struct {
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9:[0-9]+]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB1:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8:[0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -108,13 +108,13 @@ void test1(struct annotated *p, int index, int val) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB3:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -127,7 +127,7 @@ void test1(struct annotated *p, int index, int val) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test2(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = shl i32 [[TMP0]], 2
@@ -159,7 +159,7 @@ void test2(struct annotated *p, size_t index) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
@@ -170,7 +170,7 @@ void test2(struct annotated *p, size_t index) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test2_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl nsw i64 [[TMP0]], 2
@@ -195,13 +195,13 @@ size_t test2_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test3(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -266,7 +266,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
@@ -274,7 +274,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD]], 2
@@ -291,7 +291,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[IDXPROM12]], [[TMP6]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP7]], label [[CONT19:%.*]], label [[HANDLER_OUT_OF_BOUNDS15:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds15:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB6:[0-9]+]], i64 [[IDXPROM12]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont19:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[DOT_COUNTED_BY_LOAD6]], 3
@@ -308,7 +308,7 @@ size_t test3_bdos(struct annotated *p) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP13:%.*]] = icmp ult i64 [[IDXPROM28]], [[TMP12]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP13]], label [[CONT35:%.*]], label [[HANDLER_OUT_OF_BOUNDS31:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds31:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB7:[0-9]+]], i64 [[IDXPROM28]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont35:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [0 x i32], ptr [[ARRAY]], i64 0, i64 [[IDXPROM28]]
@@ -328,7 +328,7 @@ size_t test3_bdos(struct annotated *p) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test4(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[INDEX:%.*]], i32 noundef [[FAM_IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = shl i32 [[DOT_COUNTED_BY_LOAD]], 2
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 244
@@ -410,7 +410,7 @@ void test4(struct annotated *p, int index, int fam_idx) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
@@ -425,7 +425,7 @@ void test4(struct annotated *p, int index, int fam_idx) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -17179869180, 17179869181) i64 @test4_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = sext i32 [[DOT_COUNTED_BY_LOAD]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
@@ -455,12 +455,12 @@ size_t test4_bdos(struct annotated *p, int index) {
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOTCOUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
@@ -526,13 +526,13 @@ size_t test5_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[DOT_COUNTED_BY_LOAD]], [[IDXPROM]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 16
@@ -546,7 +546,7 @@ size_t test5_bdos(struct anon_struct *p) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test6(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.smax.i64(i64 [[DOT_COUNTED_BY_LOAD]], i64 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOTTR:%.*]] = trunc i64 [[TMP0]] to i32
@@ -582,7 +582,7 @@ void test6(struct anon_struct *p, int index) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -3) i64 @test6_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.smax.i64(i64 [[DOT_COUNTED_BY_LOAD]], i64 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
@@ -591,7 +591,7 @@ void test6(struct anon_struct *p, int index) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, -3) i64 @test6_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i64, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.smax.i64(i64 [[DOT_COUNTED_BY_LOAD]], i64 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 2
@@ -614,14 +614,14 @@ size_t test6_bdos(struct anon_struct *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test7(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i8, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i8 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
@@ -687,17 +687,17 @@ size_t test7_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test8(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i8, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i8 [[DOT_COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB12:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
-// SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
+// SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[INTS]], i64 0, i64 [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    store i8 [[DOT_COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA8]]
 // SANITIZE-WITH-ATTR-NEXT:    ret void
@@ -705,9 +705,9 @@ size_t test7_bdos(struct union_of_fams *p) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test8(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i8, ptr [[DOT_COUNTED_BY_GEP]], align 4
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 9
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[INTS:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 9
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[INTS]], i64 0, i64 [[IDXPROM]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[DOT_COUNTED_BY_LOAD]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
@@ -738,7 +738,7 @@ void test8(struct union_of_fams *p, int index) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i8, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i8 [[DOT_COUNTED_BY_LOAD]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP0]]
@@ -746,7 +746,7 @@ void test8(struct union_of_fams *p, int index) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 256) i64 @test8_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i8, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i8 [[DOT_COUNTED_BY_LOAD]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP0]]
@@ -768,14 +768,14 @@ size_t test8_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test9(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[TMP0]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB14:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB14:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
 // SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -841,17 +841,17 @@ size_t test9_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local void @test10(
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT7:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont7:
-// SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
+// SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[BYTES]], i64 0, i64 [[IDXPROM]]
 // SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = trunc i32 [[NARROW]] to i8
@@ -861,11 +861,11 @@ size_t test9_bdos(struct union_of_fams *p) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test10(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR1]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = trunc i32 [[NARROW]] to i8
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[BYTES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[BYTES]], i64 0, i64 [[IDXPROM]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA6]]
@@ -896,7 +896,7 @@ void test10(struct union_of_fams *p, int index) {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos(
 // SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 0)
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[NARROW]] to i64
@@ -905,7 +905,7 @@ void test10(struct union_of_fams *p, int index) {
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 0, 2147483648) i64 @test10_bdos(
 // NO-SANITIZE-WITH-ATTR-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[NARROW:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[NARROW]] to i64
@@ -929,13 +929,13 @@ size_t test10_bdos(struct union_of_fams *p) {
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[INDEX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB16:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
@@ -1017,26 +1017,26 @@ int test12_a, test12_b;
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR10:[0-9]+]]
+// SANITIZE-WITH-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR9:[0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT9:![0-9]+]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB18:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont:
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds4:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 0) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB20:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.type_mismatch6:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB21:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local noundef i32 @test12(
@@ -1059,26 +1059,26 @@ int test12_a, test12_b;
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[BAZ:%.*]] = alloca [[STRUCT_HANG:%.*]], align 4
-// SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR7:[0-9]+]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[BAZ]]) #[[ATTR6:[0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(24) [[BAZ]], ptr noundef nonnull align 4 dereferenceable(24) @test12_bar, i64 24, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[INDEX]], 6
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[INDEX]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8:![0-9]+]], !nosanitize [[META9:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP1]]) #[[ATTR8:[0-9]+]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB2:[0-9]+]], i64 [[TMP1]]) #[[ATTR7:[0-9]+]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont:
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [6 x i32], ptr [[BAZ]], i64 0, i64 [[TMP1]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    store i32 [[TMP2]], ptr @test12_b, align 4, !tbaa [[TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr @test12_foo, align 4
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[DOTCOUNTED_BY_LOAD]], 0
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[DOTNOT]], label [[HANDLER_OUT_OF_BOUNDS4:%.*]], label [[HANDLER_TYPE_MISMATCH6:%.*]], !prof [[PROF10:![0-9]+]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds4:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 0) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB4:[0-9]+]], i64 0) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.type_mismatch6:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_type_mismatch_v1_abort(ptr nonnull @[[GLOB5:[0-9]+]], i64 ptrtoint (ptr getelementptr inbounds nuw (i8, ptr @test12_foo, i64 4) to i64)) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local noundef i32 @test12(
@@ -1120,13 +1120,13 @@ struct test13_bar {
 // SANITIZE-WITH-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA11:![0-9]+]]
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[INDEX]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB24:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
@@ -1147,13 +1147,13 @@ struct test13_bar {
 // SANITIZE-WITHOUT-ATTR-SAME: i64 noundef [[INDEX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr @test13_f, align 8, !tbaa [[TBAA11:![0-9]+]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP2]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[INDEX]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB8:[0-9]+]], i64 [[INDEX]]) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont5:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[REVMAP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
@@ -1184,14 +1184,13 @@ struct test14_foo {
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB25:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       trap:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR9]]
-// SANITIZE-WITH-ATTR-NEXT:    unreachable
+// SANITIZE-WITH-ATTR:       cont3:
+// SANITIZE-WITH-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test14(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
@@ -1210,14 +1209,13 @@ struct test14_foo {
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB9:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       trap:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR8]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    unreachable
+// SANITIZE-WITHOUT-ATTR:       cont3:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test14(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
@@ -1240,14 +1238,13 @@ int test14(int idx) {
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB27:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR:       trap:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR9]]
-// SANITIZE-WITH-ATTR-NEXT:    unreachable
+// SANITIZE-WITH-ATTR:       cont1:
+// SANITIZE-WITH-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test15(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
@@ -1261,14 +1258,13 @@ int test14(int idx) {
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[IDX]], 0
-// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[TRAP:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT1:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB11:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR:       trap:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @llvm.trap() #[[ATTR8]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    unreachable
+// SANITIZE-WITHOUT-ATTR:       cont1:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 undef
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test15(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR1]] {
@@ -1420,36 +1416,36 @@ struct tests_foo {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 40
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 40
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT4:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 10) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB28:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont4:
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP1]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test24(
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr nocapture noundef readonly [[VAR:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test24(
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i32 @test24(
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr nocapture noundef readonly [[VAR:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 84
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 84
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP0]]
 //
@@ -1466,10 +1462,10 @@ int test24(int c, struct tests_foo *var) {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[DOTCOUNTED_BY_LOAD]], 10
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 10) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB29:[0-9]+]], i64 10) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA4]]
 // SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP2]]
 //
@@ -1477,7 +1473,7 @@ int test24(int c, struct tests_foo *var) {
 // NO-SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr nocapture noundef readonly [[VAR:%.*]]) local_unnamed_addr #[[ATTR8:[0-9]+]] {
 // NO-SANITIZE-WITH-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA14:![0-9]+]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    ret i32 [[TMP1]]
 //
@@ -1485,7 +1481,7 @@ int test24(int c, struct tests_foo *var) {
 // SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[VAR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA17:![0-9]+]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP1]]
 //
@@ -1493,7 +1489,7 @@ int test24(int c, struct tests_foo *var) {
 // NO-SANITIZE-WITHOUT-ATTR-SAME: i32 noundef [[C:%.*]], ptr nocapture noundef readonly [[VAR:%.*]]) local_unnamed_addr #[[ATTR7:[0-9]+]] {
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8, !tbaa [[TBAA14:![0-9]+]]
-// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 44
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 44
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i32 [[TMP1]]
 //
@@ -1511,14 +1507,14 @@ struct test26_foo {
 // SANITIZE-WITH-ATTR-LABEL: define dso_local i32 @test26(
 // SANITIZE-WITH-ATTR-SAME: i32 noundef [[C:%.*]], ptr noundef [[FOO:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
-// SANITIZE-WITH-ATTR-NEXT:    [[S:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4
+// SANITIZE-WITH-ATTR-NEXT:    [[S:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[C]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[S]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT5:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB30:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont5:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 8
@@ -1583,13 +1579,13 @@ struct test27_foo {
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]], i32 noundef [[I:%.*]], i32 noundef [[J:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP0]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP1]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB32:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
 // SANITIZE-WITH-ATTR-NEXT:    [[ENTRIES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 24
@@ -1649,13 +1645,13 @@ struct test28_foo {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8, !tbaa [[TBAA21]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA21]]
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I]] to i64
-// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOTCOUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOTCOUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[DOTCOUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT17:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB34:[0-9]+]], i64 [[IDXPROM]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont17:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
@@ -1717,19 +1713,19 @@ struct annotated_struct_array {
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT3:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB36:[0-9]+]], i64 [[TMP1]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB36:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont3:
-// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA23:![0-9]+]]
-// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 // SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM15:%.*]] = sext i32 [[IDX2]] to i64
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP3:%.*]] = zext i32 [[DOT_COUNTED_BY_LOAD]] to i64, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[IDXPROM15]], [[TMP3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    br i1 [[TMP4]], label [[CONT20:%.*]], label [[HANDLER_OUT_OF_BOUNDS16:%.*]], !prof [[PROF3]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       handler.out_of_bounds16:
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB37:[0-9]+]], i64 [[IDXPROM15]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR:       cont20:
 // SANITIZE-WITH-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
@@ -1745,7 +1741,7 @@ struct annotated_struct_array {
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX1]] to i64
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[IDXPROM]]
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA20:![0-9]+]]
-// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[DOT_COUNTED_BY_LOAD:%.*]] = load i32, ptr [[DOT_COUNTED_BY_GEP]], align 4
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[DOT_COUNTED_BY_LOAD]], i32 0)
 // NO-SANITIZE-WITH-ATTR-NEXT:    [[CONV:%.*]] = shl i32 [[TMP1]], 2
@@ -1762,10 +1758,10 @@ struct annotated_struct_array {
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP1:%.*]] = zext i32 [[IDX1]] to i64
 // SANITIZE-WITHOUT-ATTR-NEXT:    br i1 [[TMP0]], label [[CONT21:%.*]], label [[HANDLER_OUT_OF_BOUNDS:%.*]], !prof [[PROF8]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       handler.out_of_bounds:
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB13:[0-9]+]], i64 [[TMP1]]) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR:       cont21:
-// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x ptr], ptr [[ANN]], i64 0, i64 [[TMP1]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA23:![0-9]+]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[ARRAY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 12
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[IDXPROM18:%.*]] = sext i32 [[IDX2]] to i64
@@ -1804,7 +1800,7 @@ struct test30_struct {
 // SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR4]] {
 // SANITIZE-WITH-ATTR-NEXT:  entry:
 // SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META2]]
-// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[TMP0]]) #[[ATTR9]], !nosanitize [[META2]]
+// SANITIZE-WITH-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB39:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META2]]
 // SANITIZE-WITH-ATTR-NEXT:    unreachable, !nosanitize [[META2]]
 //
 // NO-SANITIZE-WITH-ATTR-LABEL: define dso_local void @test30(
@@ -1820,7 +1816,7 @@ struct test30_struct {
 // SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]], i32 noundef [[IDX:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // SANITIZE-WITHOUT-ATTR-NEXT:  entry:
 // SANITIZE-WITHOUT-ATTR-NEXT:    [[TMP0:%.*]] = zext i32 [[IDX]] to i64, !nosanitize [[META9]]
-// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[TMP0]]) #[[ATTR8]], !nosanitize [[META9]]
+// SANITIZE-WITHOUT-ATTR-NEXT:    tail call void @__ubsan_handle_out_of_bounds_abort(ptr nonnull @[[GLOB15:[0-9]+]], i64 [[TMP0]]) #[[ATTR7]], !nosanitize [[META9]]
 // SANITIZE-WITHOUT-ATTR-NEXT:    unreachable, !nosanitize [[META9]]
 //
 // NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local void @test30(

diff  --git a/clang/test/CodeGen/math-libcalls-tbaa.c b/clang/test/CodeGen/math-libcalls-tbaa.c
index 9c86eea67d14d8..17a93ed2aed2d0 100644
--- a/clang/test/CodeGen/math-libcalls-tbaa.c
+++ b/clang/test/CodeGen/math-libcalls-tbaa.c
@@ -16,7 +16,7 @@ float crealf(float _Complex);
 // CHECK-LABEL: define dso_local float @test_expf(
 // CHECK-SAME: ptr nocapture noundef readonly [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 40
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9:[0-9]+]], !tbaa [[TBAA6:![0-9]+]]
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
@@ -31,7 +31,7 @@ float test_expf (float num[]) {
 // CHECK-LABEL: define dso_local float @test_builtin_expf(
 // CHECK-SAME: ptr nocapture noundef readonly [[NUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 40
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 40
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[CALL:%.*]] = tail call float @expf(float noundef [[TMP0]]) #[[ATTR9]], !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP0]], [[CALL]]
@@ -48,7 +48,7 @@ float test_builtin_expf (float num[]) {
 // CHECK-LABEL: define dso_local double @test_fabs(
 // CHECK-SAME: ptr nocapture noundef readonly [[NUM:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 80
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
 // CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8:![0-9]+]]
 // CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.fabs.f64(double [[TMP0]])
 // CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[TMP1]]
@@ -63,7 +63,7 @@ double test_fabs (double num[]) {
 // CHECK-LABEL: define dso_local double @test_remainder(
 // CHECK-SAME: ptr nocapture noundef readonly [[NUM:%.*]], double noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 80
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 80
 // CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    [[CALL:%.*]] = tail call double @remainder(double noundef [[TMP0]], double noundef [[A]]) #[[ATTR9]], !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
@@ -83,7 +83,7 @@ double test_remainder (double num[], double a) {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[E:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[E]]) #[[ATTR9]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 16
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 16
 // CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    [[CALL:%.*]] = call double @frexp(double noundef [[TMP0]], ptr noundef nonnull [[E]]) #[[ATTR9]]
 // CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP0]], [[CALL]]
@@ -107,7 +107,7 @@ double test_frexp (double num[]) {
 // CHECK-NEXT:    [[COS:%.*]] = alloca float, align 4
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[SIN]]) #[[ATTR9]]
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[COS]]) #[[ATTR9]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    call void @sincos(float noundef [[TMP0]], ptr noundef nonnull [[SIN]], ptr noundef nonnull [[COS]]) #[[ATTR9]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[SIN]], align 4, !tbaa [[TBAA2]]
@@ -130,7 +130,7 @@ float test_sincos (float num[]) {
 // CHECK-LABEL: define dso_local float @test_cacoshf(
 // CHECK-SAME: ptr nocapture noundef readonly [[NUM:%.*]]) local_unnamed_addr #[[ATTR7]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[NUM]], i64 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[NUM]], i64 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[TMP0]], 0
 // CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x float] [[DOTFCA_0_INSERT]], float 0.000000e+00, 1

diff  --git a/clang/test/CodeGen/union-tbaa1.c b/clang/test/CodeGen/union-tbaa1.c
index 5263b1714c8c6e..0f7a67cb7eccd1 100644
--- a/clang/test/CodeGen/union-tbaa1.c
+++ b/clang/test/CodeGen/union-tbaa1.c
@@ -27,10 +27,10 @@ void bar(vect32 p[][2]);
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds [4 x [2 x %union.vect32]], ptr [[TMP]], i32 0, i32 [[TMP4]], i32 1
-// CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX14]], i32 2
+// CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX14]], i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa [[TBAA6]]
 // CHECK-NEXT:    [[CONV16:%.*]] = zext i16 [[TMP5]] to i32
-// CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds i8, ptr [[VEC]], i32 4
+// CHECK-NEXT:    [[ARRAYIDX17:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i32 4
 // CHECK-NEXT:    store i32 [[CONV16]], ptr [[ARRAYIDX17]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    call void @bar(ptr noundef nonnull [[TMP]]) #[[ATTR3]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[TMP]]) #[[ATTR3]]

diff  --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index 838bdda8257286..334b33c872bbc8 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -331,7 +331,7 @@ struct S {
 // OPT-NEXT:  [[ENTRY:.*:]]
 // OPT-NEXT:    [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4:![0-9]+]]
 // OPT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1)
-// OPT-NEXT:    [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8
+// OPT-NEXT:    [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8
 // OPT-NEXT:    [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]]
 // OPT-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1)
 // OPT-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]]
@@ -438,7 +438,7 @@ __global__ void kernel4(struct S s) {
 // OPT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
 // OPT-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
 // OPT-NEXT:    store i32 [[INC]], ptr [[TMP0]], align 4
-// OPT-NEXT:    [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
+// OPT-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[S_COERCE]], i64 8
 // OPT-NEXT:    [[TMP2:%.*]] = load ptr, ptr addrspace(1) [[Y]], align 8
 // OPT-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 // OPT-NEXT:    [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
@@ -539,7 +539,7 @@ struct T {
 // OPT-NEXT:  [[ENTRY:.*:]]
 // OPT-NEXT:    [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4]]
 // OPT-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1)
-// OPT-NEXT:    [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8
+// OPT-NEXT:    [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP0]], i64 8
 // OPT-NEXT:    [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]]
 // OPT-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1)
 // OPT-NEXT:    [[TMP3:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]]

diff  --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp
index 7c323c2d368c0e..cf92ad62d72844 100644
--- a/clang/test/CodeGenCXX/auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/auto-var-init.cpp
@@ -1296,7 +1296,7 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 0x44444444 });
 // CHECK-NOT:   !annotation
 // CHECK-O0:  call void @{{.*}}used{{.*}}%custom)
 // PATTERN-O1:       store i32 1145324612, ptr %custom, align 4
-// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 4
+// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
 // PATTERN-O1-NEXT:  store i32 1145324612, ptr %[[I]], align 4
 // ZERO-O1:          store i64 4919131752989213764, ptr %custom, align 8
 // CHECK-NOT:   !annotation
@@ -1494,11 +1494,11 @@ TEST_CUSTOM(unmatchedreverse, unmatchedreverse, { .c = 42  });
 // CHECK-NOT:   !annotation
 // CHECK-O0:    call void @{{.*}}used{{.*}}%custom)
 // PATTERN-O1:  store i8 42, ptr {{.*}}, align 4
-// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1
+// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 1
 // PATTERN-O1-NEXT:  store i8 -86, ptr %[[I]], align {{.*}}
-// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 2
+// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 2
 // PATTERN-O1-NEXT:  store i8 -86, ptr %[[I]], align {{.*}}
-// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 3
+// PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, i64 3
 // PATTERN-O1-NEXT:  store i8 -86, ptr %[[I]], align {{.*}}
 // ZERO-O1:     store i32 42, ptr {{.*}}, align 4
 

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
index 4b046acf0d8789..a5d0d5ad6e91f6 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
@@ -18,7 +18,7 @@ T* test1(V* x) { return &dynamic_cast<T&>(*x); }
 T* test2(A* x) { return &dynamic_cast<T&>(*x); }
 // CHECK-LABEL: define dso_local noundef ptr @"?test2@@YAPAUT@@PAUA@@@Z"(ptr noundef %x)
 // CHECK:        [[VBTBL:%.*]] = load ptr, ptr %x, align 4
-// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[VBOFFS]]
 // CHECK-NEXT:   [[CALL:%.*]] = tail call ptr @__RTDynamicCast(ptr nonnull [[ADJ]], i32 [[VBOFFS]], ptr nonnull @"??_R0?AUA@@@8", ptr nonnull @"??_R0?AUT@@@8", i32 1)
@@ -26,9 +26,9 @@ T* test2(A* x) { return &dynamic_cast<T&>(*x); }
 
 T* test3(B* x) { return &dynamic_cast<T&>(*x); }
 // CHECK-LABEL: define dso_local noundef ptr @"?test3@@YAPAUT@@PAUB@@@Z"(ptr noundef %x)
-// CHECK:        [[VBPTR:%.*]] = getelementptr inbounds i8, ptr %x, i32 4
+// CHECK:        [[VBPTR:%.*]] = getelementptr inbounds nuw i8, ptr %x, i32 4
 // CHECK-NEXT:   [[VBTBL:%.*]] = load ptr, ptr [[VBPTR:%.*]], align 4
-// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
 // CHECK-NEXT:   [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[DELTA]]
@@ -45,7 +45,7 @@ T* test5(A* x) { return dynamic_cast<T*>(x); }
 // CHECK:        [[CHECK:%.*]] = icmp eq ptr %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
 // CHECK:        [[VBTBL:%.*]] = load ptr, ptr %x, align 4
-// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[VBOFFS]]
 // CHECK-NEXT:   [[CALL:%.*]] = tail call ptr @__RTDynamicCast(ptr nonnull [[ADJ]], i32 [[VBOFFS]], ptr {{.*}}@"??_R0?AUA@@@8", ptr {{.*}}@"??_R0?AUT@@@8", i32 0)
@@ -57,9 +57,9 @@ T* test6(B* x) { return dynamic_cast<T*>(x); }
 // CHECK-LABEL: define dso_local noundef ptr @"?test6@@YAPAUT@@PAUB@@@Z"(ptr noundef %x)
 // CHECK:        [[CHECK:%.*]] = icmp eq ptr %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
-// CHECK:        [[VBPTR:%.*]] = getelementptr inbounds i8, ptr %x, i32 4
+// CHECK:        [[VBPTR:%.*]] = getelementptr inbounds nuw i8, ptr %x, i32 4
 // CHECK-NEXT:   [[VBTBL:%.*]] = load ptr, ptr [[VBPTR]], align 4
-// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
 // CHECK-NEXT:   [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[DELTA]]
@@ -78,7 +78,7 @@ void* test8(A* x) { return dynamic_cast<void*>(x); }
 // CHECK:        [[CHECK:%.*]] = icmp eq ptr %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
 // CHECK:        [[VBTBL:%.*]] = load ptr, ptr %x, align 4
-// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[VBOFFS]]
 // CHECK-NEXT:   [[RES:%.*]] = tail call ptr @__RTCastToVoid(ptr nonnull [[ADJ]])
@@ -90,9 +90,9 @@ void* test9(B* x) { return dynamic_cast<void*>(x); }
 // CHECK-LABEL: define dso_local noundef ptr @"?test9@@YAPAXPAUB@@@Z"(ptr noundef %x)
 // CHECK:        [[CHECK:%.*]] = icmp eq ptr %x, null
 // CHECK-NEXT:   br i1 [[CHECK]]
-// CHECK:        [[VBPTR:%.*]] = getelementptr inbounds i8, ptr %x, i32 4
+// CHECK:        [[VBPTR:%.*]] = getelementptr inbounds nuw i8, ptr %x, i32 4
 // CHECK-NEXT:   [[VBTBL:%.*]] = load ptr, ptr [[VBPTR]], align 4
-// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
 // CHECK-NEXT:   [[BASE:%.*]] = getelementptr i8, ptr %x, i32 [[VBOFFS]]
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr i8, ptr [[BASE]], i32 4

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
index 31d7d23fd6fc5c..24a19cd8566020 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp
@@ -31,7 +31,7 @@ const std::type_info* test3_typeid() { return &typeid(*fn()); }
 // CHECK:        call ptr @__RTtypeid(ptr null)
 // CHECK-NEXT:   unreachable
 // CHECK:        [[VBTBL:%.*]] = load ptr, ptr [[CALL]], align 4
-// CHECK-NEXT:   [[VBSLOT:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
+// CHECK-NEXT:   [[VBSLOT:%.*]] = getelementptr inbounds nuw i8, ptr [[VBTBL]], i32 4
 // CHECK-NEXT:   [[VBASE_OFFS:%.*]] = load i32, ptr [[VBSLOT]], align 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i32 [[VBASE_OFFS]]
 // CHECK-NEXT:   [[RT:%.*]] = tail call ptr @__RTtypeid(ptr nonnull [[ADJ]])

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
index 53cca49e87ef4b..a0c106bca83c97 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -513,9 +513,9 @@ typedef struct {
 
 // CHECK-LABEL: test_memset_private
 // CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) noundef align 8 {{.*}}, i8 0, i64 32, i1 false)
-// CHECK: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(5) %ptr, i32 32
+// CHECK: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) %ptr, i32 32
 // CHECK: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) [[GEP]]
-// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) {{.*}}, i32 36
+// CHECK: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) {{.*}}, i32 36
 // CHECK: store i32 0, ptr addrspace(5) [[GEP1]], align 4
 void test_memset_private(private StructTy3 *ptr) {
   StructTy3 S3 = {0, 0, 0, 0, 0};

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index c22a43146a8c89..ded5f6b5ac4fd3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -620,11 +620,11 @@ void test_get_local_id(int d, global int *out)
 
 // CHECK-LABEL: @test_get_workgroup_size(
 // CHECK: {{.*}}call align 8 dereferenceable(256){{.*}} ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 12
+// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 12
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 4, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 14
+// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 14
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 16
+// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 16
 // CHECK: load i16, ptr addrspace(4) %{{.*}}, align 8, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 void test_get_workgroup_size(int d, global int *out)
 {
@@ -638,7 +638,7 @@ void test_get_workgroup_size(int d, global int *out)
 
 // CHECK-LABEL: @test_get_grid_size(
 // CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
+// CHECK: getelementptr inbounds nuw i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
 // CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !range [[$GRID_RANGE:![0-9]+]], !invariant.load
 void test_get_grid_size(int d, global int *out)
 {

diff  --git a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
index 86f21ee556ce83..697e50e199a041 100644
--- a/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
+++ b/clang/test/CodeGenOpenCLCXX/array-type-infinite-loop.clcpp
@@ -2,11 +2,11 @@
 //RUN: %clang_cc1 %s -triple spir -emit-llvm -O1 -o - | FileCheck %s
 
 // CHECK-LABEL: define dso_local spir_kernel void @test(
-// CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 8 [[IN:%.*]], ptr addrspace(1) nocapture noundef writeonly align 8 initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
+// CHECK-SAME: ptr addrspace(1) nocapture noundef readonly align 8 [[IN:%.*]], ptr addrspace(1) nocapture noundef writeonly align 8 initializes((0, 8)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN]], i32 8
-// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1]], align 8, !tbaa [[TBAA7:![0-9]+]]
-// CHECK-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA7]]
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[IN]], i32 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr addrspace(1) [[ARRAYIDX1]], align 8, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[OUT]], align 8, !tbaa [[TBAA8]]
 // CHECK-NEXT:    ret void
 //
 __kernel void test(__global long *In, __global long *Out) {
@@ -14,12 +14,12 @@ __kernel void test(__global long *In, __global long *Out) {
    *Out = m[1];
 }
 //.
-// CHECK: [[META3]] = !{i32 1, i32 1}
-// CHECK: [[META4]] = !{!"none", !"none"}
-// CHECK: [[META5]] = !{!"long*", !"long*"}
-// CHECK: [[META6]] = !{!"", !""}
-// CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
-// CHECK: [[META8]] = !{!"long", [[META9:![0-9]+]], i64 0}
-// CHECK: [[META9]] = !{!"omnipotent char", [[META10:![0-9]+]], i64 0}
-// CHECK: [[META10]] = !{!"Simple C++ TBAA"}
+// CHECK: [[META4]] = !{i32 1, i32 1}
+// CHECK: [[META5]] = !{!"none", !"none"}
+// CHECK: [[META6]] = !{!"long*", !"long*"}
+// CHECK: [[META7]] = !{!"", !""}
+// CHECK: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+// CHECK: [[META9]] = !{!"long", [[META10:![0-9]+]], i64 0}
+// CHECK: [[META10]] = !{!"omnipotent char", [[META11:![0-9]+]], i64 0}
+// CHECK: [[META11]] = !{!"Simple C++ TBAA"}
 //.

diff  --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
index d9591624a08f04..82dd07a1a63bb6 100644
--- a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -86,7 +86,7 @@ struct S {
 // CHECK-NEXT:    [[TMP18:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..6)
 // CHECK-NEXT:    [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA20:![0-9]+]]
 // CHECK-NEXT:    store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA23:![0-9]+]]
-// CHECK-NEXT:    [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 8
+// CHECK-NEXT:    [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP19]], i64 8
 // CHECK-NEXT:    store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA25:![0-9]+]]
 // CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
 // CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
@@ -144,7 +144,7 @@ struct S {
 // CHECK-NEXT:    [[TMP4:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..10)
 // CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA20]]
 // CHECK-NEXT:    store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA37:![0-9]+]]
-// CHECK-NEXT:    [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 8
+// CHECK-NEXT:    [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 8
 // CHECK-NEXT:    store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA23]]
 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 40
 // CHECK-NEXT:    store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA15]]

diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3325a1868ebde4..6c60fefe02e653 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3113,6 +3113,15 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
   }
 
+  // nusw + nneg -> nuw
+  if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
+      all_of(GEP.indices(), [&](Value *Idx) {
+        return isKnownNonNegative(Idx, SQ.getWithInstruction(&GEP));
+      })) {
+    GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
+    return &GEP;
+  }
+
   if (Instruction *R = foldSelectGEP(GEP, Builder))
     return R;
 

diff  --git a/llvm/test/Analysis/BasicAA/featuretest.ll b/llvm/test/Analysis/BasicAA/featuretest.ll
index 5ef94cd8c7fdaf..fd5d2eff10f836 100644
--- a/llvm/test/Analysis/BasicAA/featuretest.ll
+++ b/llvm/test/Analysis/BasicAA/featuretest.ll
@@ -59,7 +59,7 @@ define i32 @constant_array_index_test() {
 ; CHECK-LABEL: @constant_array_index_test(
 ; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [100 x i32], align 4
 ; CHECK-NEXT:    call void @external(ptr nonnull [[ARRAY1]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[ARRAY1]], i64 24
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAY1]], i64 24
 ; CHECK-NEXT:    store i32 1, ptr [[P2]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;

diff  --git a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
index 8691e63a4f3ee1..11eda42dc623ba 100644
--- a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
@@ -557,7 +557,7 @@ while.end.i:
 define i1 @recursiveGEP_withPtrSub3(ptr %val1) {
 ; CHECK-LABEL: @recursiveGEP_withPtrSub3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEST_VAL1:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 7
+; CHECK-NEXT:    [[TEST_VAL1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAL1:%.*]], i64 7
 ; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
 ; CHECK:       while.cond.i:
 ; CHECK-NEXT:    [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[TEST_VAL1]], [[ENTRY:%.*]] ]
@@ -659,7 +659,7 @@ while.end.i:
 define i1 @recursiveGEP_withPtrSub1_notKnownNonEqual3(ptr %val1) {
 ; CHECK-LABEL: @recursiveGEP_withPtrSub1_notKnownNonEqual3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEST_VAL1:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 5
+; CHECK-NEXT:    [[TEST_VAL1:%.*]] = getelementptr inbounds nuw i8, ptr [[VAL1:%.*]], i64 5
 ; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
 ; CHECK:       while.cond.i:
 ; CHECK-NEXT:    [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[TEST_VAL1]], [[ENTRY:%.*]] ]
@@ -792,7 +792,7 @@ while.end.i:
 define i1 @recursiveGEP_withPtrSub_noninboundStepAndB(ptr %val1) {
 ; CHECK-LABEL: @recursiveGEP_withPtrSub_noninboundStepAndB(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEST:%.*]] = getelementptr inbounds i8, ptr [[VAL1:%.*]], i64 2
+; CHECK-NEXT:    [[TEST:%.*]] = getelementptr inbounds nuw i8, ptr [[VAL1:%.*]], i64 2
 ; CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
 ; CHECK:       while.cond.i:
 ; CHECK-NEXT:    [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[TEST]], [[ENTRY:%.*]] ]

diff  --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
index e37b6ff10ffa96..3563e737f55203 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
@@ -5,7 +5,7 @@
 define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: @get_local_size_x(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
+; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
 ; GCN-NEXT:    [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4
 ; GCN-NEXT:    store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -25,7 +25,7 @@ define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
 define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: @get_local_size_y(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
+; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
 ; GCN-NEXT:    [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 2
 ; GCN-NEXT:    store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -46,7 +46,7 @@ define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
 define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: @get_local_size_z(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
+; GCN-NEXT:    [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
 ; GCN-NEXT:    [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4
 ; GCN-NEXT:    store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -106,7 +106,7 @@ define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) #0 {
 define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: @get_work_group_size_x(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; GCN-NEXT:    [[GEP_X:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
+; GCN-NEXT:    [[GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
 ; GCN-NEXT:    [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP_X]], align 4
 ; GCN-NEXT:    store i16 [[GROUP_SIZE_X]], ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -122,7 +122,7 @@ define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
 define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: @get_work_group_size_y(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; GCN-NEXT:    [[GEP_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
+; GCN-NEXT:    [[GEP_Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
 ; GCN-NEXT:    [[GROUP_SIZE_Y:%.*]] = load i16, ptr addrspace(4) [[GEP_Y]], align 2
 ; GCN-NEXT:    store i16 [[GROUP_SIZE_Y]], ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void
@@ -138,7 +138,7 @@ define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
 define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) #0 {
 ; GCN-LABEL: @get_work_group_size_z(
 ; GCN-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-; GCN-NEXT:    [[GEP_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
+; GCN-NEXT:    [[GEP_Z:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
 ; GCN-NEXT:    [[GROUP_SIZE_Z:%.*]] = load i16, ptr addrspace(4) [[GEP_Z]], align 4
 ; GCN-NEXT:    store i16 [[GROUP_SIZE_Z]], ptr addrspace(1) [[OUT:%.*]], align 2
 ; GCN-NEXT:    ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index 8c584a1890c9df..98b701ab7f9d96 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -339,7 +339,7 @@ define amdgpu_kernel void @all_local_size(ptr addrspace(1) nocapture readnone %o
 ; TODO: Should be able to handle this, but not much reason to.
 ; CHECK-LABEL: @partial_load_group_size_x(
 ; CHECK-NEXT: %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
+; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds nuw i8, ptr addrspace(4) %dispatch.ptr, i64 4
 ; CHECK-NEXT: %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 4
 ; CHECK-NEXT: store i8 %group.size.x.lo, ptr addrspace(1) %out, align 1
 define amdgpu_kernel void @partial_load_group_size_x(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
@@ -352,7 +352,7 @@ define amdgpu_kernel void @partial_load_group_size_x(ptr addrspace(1) %out) #0 !
 
 ; CHECK-LABEL: @partial_load_group_size_x_explicit_callsite_align(
 ; CHECK-NEXT: %dispatch.ptr = tail call align 2 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
+; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds nuw i8, ptr addrspace(4) %dispatch.ptr, i64 4
 ; CHECK-NEXT: %group.size.x.lo = load i8, ptr addrspace(4) %gep.group.size.x, align 2
 ; CHECK-NEXT: store i8 %group.size.x.lo, ptr addrspace(1) %out, align 1
 define amdgpu_kernel void @partial_load_group_size_x_explicit_callsite_align(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 {
@@ -394,7 +394,7 @@ define amdgpu_kernel void @load_group_size_x_y_multiple_dispatch_ptr(ptr addrspa
 
 ; CHECK-LABEL: @use_local_size_x_uniform_work_group_size(
 ; CHECK-NEXT: %dispatch.ptr = tail call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds i8, ptr addrspace(4) %dispatch.ptr, i64 4
+; CHECK-NEXT: %gep.group.size.x = getelementptr inbounds nuw i8, ptr addrspace(4) %dispatch.ptr, i64 4
 ; CHECK-NEXT: %group.size.x = load i16, ptr addrspace(4) %gep.group.size.x, align 4
 ; CHECK: %group.size.x.zext = zext i16 %group.size.x to i32
 ; CHECK: store i64 %zext, ptr addrspace(1) %out

diff  --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
index aec86ec343bdb5..0af8c95da8d8ba 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
@@ -368,11 +368,11 @@ define amdgpu_kernel void @vector_bitcast_from_alloca_array(ptr addrspace(1) %ou
 ; OPT-LABEL: @vector_bitcast_to_array_from_alloca_array(
 ; OPT-NOT:   alloca
 ; OPT-NEXT: store i32 1, ptr addrspace(1) %out, align 4
-; OPT-NEXT: %out.repack1 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 4
+; OPT-NEXT: %out.repack1 = getelementptr inbounds nuw i8, ptr addrspace(1) %out, i64 4
 ; OPT-NEXT: store i32 2, ptr addrspace(1) %out.repack1, align 4
-; OPT-NEXT: %out.repack2 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 8
+; OPT-NEXT: %out.repack2 = getelementptr inbounds nuw i8, ptr addrspace(1) %out, i64 8
 ; OPT-NEXT: store i32 3, ptr addrspace(1) %out.repack2, align 4
-; OPT-NEXT: %out.repack3 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 12
+; OPT-NEXT: %out.repack3 = getelementptr inbounds nuw i8, ptr addrspace(1) %out, i64 12
 ; OPT-NEXT: store i32 4, ptr addrspace(1) %out.repack3, align 4
 
 ; GCN-LABEL: {{^}}vector_bitcast_to_array_from_alloca_array:
@@ -394,11 +394,11 @@ define amdgpu_kernel void @vector_bitcast_to_array_from_alloca_array(ptr addrspa
 ; OPT-LABEL: @vector_bitcast_to_struct_from_alloca_array(
 ; OPT-NOT:   alloca
 ; OPT-NEXT: store i32 1, ptr addrspace(1) %out, align 4
-; OPT-NEXT: %out.repack1 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 4
+; OPT-NEXT: %out.repack1 = getelementptr inbounds nuw i8, ptr addrspace(1) %out, i64 4
 ; OPT-NEXT: store i32 2, ptr addrspace(1) %out.repack1, align 4
-; OPT-NEXT: %out.repack2 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 8
+; OPT-NEXT: %out.repack2 = getelementptr inbounds nuw i8, ptr addrspace(1) %out, i64 8
 ; OPT-NEXT: store i32 3, ptr addrspace(1) %out.repack2, align 4
-; OPT-NEXT: %out.repack3 = getelementptr inbounds i8, ptr addrspace(1) %out, i64 12
+; OPT-NEXT: %out.repack3 = getelementptr inbounds nuw i8, ptr addrspace(1) %out, i64 12
 ; OPT-NEXT: store i32 4, ptr addrspace(1) %out.repack3, align 4
 
 ; GCN-LABEL: {{^}}vector_bitcast_to_struct_from_alloca_array:

diff  --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll
index f02d0a242dc992..acc448559c7fad 100644
--- a/llvm/test/Transforms/Coroutines/coro-async.ll
+++ b/llvm/test/Transforms/Coroutines/coro-async.ll
@@ -120,19 +120,19 @@ define void @my_async_function_pa(ptr %ctxt, ptr %task, ptr %actor) {
 ; CHECK-O0-LABEL: define swiftcc void @my_async_function(ptr swiftasync %async.ctxt, ptr %task, ptr %actor)
 ; CHECK-SAME: !dbg ![[SP1:[0-9]+]] {
 ; CHECK: coro.return:
-; CHECK:   [[FRAMEPTR:%.*]] = getelementptr inbounds i8, ptr %async.ctxt, i64 128
-; CHECK:   [[ACTOR_SPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr %async.ctxt, i64 152
+; CHECK:   [[FRAMEPTR:%.*]] = getelementptr inbounds nuw i8, ptr %async.ctxt, i64 128
+; CHECK:   [[ACTOR_SPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr %async.ctxt, i64 152
 ; CHECK:   store ptr %actor, ptr [[ACTOR_SPILL_ADDR]]
-; CHECK:   [[ADDR1:%.*]]  = getelementptr inbounds i8, ptr %async.ctxt, i64 144
+; CHECK:   [[ADDR1:%.*]]  = getelementptr inbounds nuw i8, ptr %async.ctxt, i64 144
 ; CHECK:   store ptr %async.ctxt, ptr [[ADDR1]]
-; CHECK:   [[ALLOCA_PRJ2:%.*]] = getelementptr inbounds i8, ptr %async.ctxt, i64 136
+; CHECK:   [[ALLOCA_PRJ2:%.*]] = getelementptr inbounds nuw i8, ptr %async.ctxt, i64 136
 ; CHECK:   store i64 0, ptr [[FRAMEPTR]]
 ; CHECK:   store i64 1, ptr [[ALLOCA_PRJ2]]
 ; CHECK:   tail call void @some_may_write(ptr nonnull [[FRAMEPTR]])
 ; CHECK:   [[CALLEE_CTXT:%.*]] = tail call ptr @llvm.coro.async.context.alloc(ptr %task, ptr nonnull @my_other_async_function_fp)
-; CHECK:   [[CALLEE_CTXT_SPILL:%.*]] = getelementptr inbounds i8, ptr %async.ctxt, i64 160
+; CHECK:   [[CALLEE_CTXT_SPILL:%.*]] = getelementptr inbounds nuw i8, ptr %async.ctxt, i64 160
 ; CHECK:   store ptr [[CALLEE_CTXT]], ptr [[CALLEE_CTXT_SPILL]]
-; CHECK:   [[TYPED_RETURN_TO_CALLER_ADDR:%.*]] = getelementptr inbounds i8, ptr [[CALLEE_CTXT]], i64 8
+; CHECK:   [[TYPED_RETURN_TO_CALLER_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLEE_CTXT]], i64 8
 ; CHECK:   store ptr @my_async_functionTQ0_, ptr [[TYPED_RETURN_TO_CALLER_ADDR]]
 ; CHECK:   store ptr %async.ctxt, ptr [[CALLEE_CTXT]]
 ; Make sure the spill is underaligned to the max context alignment (16).
@@ -148,16 +148,16 @@ define void @my_async_function_pa(ptr %ctxt, ptr %task, ptr %actor) {
 ; CHECK-SAME: !dbg ![[SP2:[0-9]+]] {
 ; CHECK: entryresume.0:
 ; CHECK:   [[CALLER_CONTEXT:%.*]] = load ptr, ptr %0
-; CHECK:   [[FRAME_PTR:%.*]] = getelementptr inbounds i8, ptr [[CALLER_CONTEXT]], i64 128
+; CHECK:   [[FRAME_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLER_CONTEXT]], i64 128
 ; CHECK-O0:   [[VECTOR_SPILL_ADDR:%.*]] = getelementptr inbounds %my_async_function.Frame, ptr {{.*}}, i32 0, i32 1
 ; CHECK-O0:   load <4 x double>, ptr [[VECTOR_SPILL_ADDR]], align 16
-; CHECK:   [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr [[CALLER_CONTEXT]], i64 160
+; CHECK:   [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLER_CONTEXT]], i64 160
 ; CHECK:   [[CALLEE_CTXT_RELOAD:%.*]] = load ptr, ptr [[CALLEE_CTXT_SPILL_ADDR]]
-; CHECK:   [[ACTOR_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[CALLER_CONTEXT]], i64 152
+; CHECK:   [[ACTOR_RELOAD_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLER_CONTEXT]], i64 152
 ; CHECK:   [[ACTOR_RELOAD:%.*]] = load ptr, ptr [[ACTOR_RELOAD_ADDR]]
-; CHECK:   [[ADDR1:%.*]] = getelementptr inbounds i8, ptr [[CALLER_CONTEXT]], i64 144
+; CHECK:   [[ADDR1:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLER_CONTEXT]], i64 144
 ; CHECK:   [[ASYNC_CTXT_RELOAD:%.*]] = load ptr, ptr [[ADDR1]]
-; CHECK:   [[ALLOCA_PRJ2:%.*]] = getelementptr inbounds i8, ptr [[CALLER_CONTEXT]], i64 136
+; CHECK:   [[ALLOCA_PRJ2:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLER_CONTEXT]], i64 136
 ; CHECK:   tail call void @llvm.coro.async.context.dealloc(ptr nonnull [[CALLEE_CTXT_RELOAD]])
 ; CHECK:   [[VAL1:%.*]] = load i64, ptr [[FRAME_PTR]]
 ; CHECK:   tail call void @some_user(i64 [[VAL1]])
@@ -232,7 +232,7 @@ entry:
 ; CHECK-SAME: #[[FRAMEPOINTER]]
 ; CHECK-SAME: !dbg ![[SP4:[0-9]+]]
 ; CHECK: [[CALLEE_CTXT:%.*]] = load ptr, ptr %2
-; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr [[CALLEE_CTXT]], i64 152
+; CHECK: [[CALLEE_CTXT_SPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLEE_CTXT]], i64 152
 ; CHECK: store ptr @my_async_function2.resume.1,
 ; CHECK: [[CALLLE_CTXT_RELOAD:%.*]] = load ptr, ptr [[CALLEE_CTXT_SPILL_ADDR]]
 ; CHECK: tail call swiftcc void @asyncSuspend(ptr [[CALLEE_CTXT_RELOAD]]

diff  --git a/llvm/test/Transforms/Coroutines/coro-retcon-alloca-opaque-ptr.ll b/llvm/test/Transforms/Coroutines/coro-retcon-alloca-opaque-ptr.ll
index 2dabc72a510148..b23c5222a3debd 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-alloca-opaque-ptr.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-alloca-opaque-ptr.ll
@@ -8,7 +8,7 @@ declare {ptr, ptr, i32} @prototype_f(ptr, i1)
 define {ptr, ptr, i32} @f(ptr %buffer, i32 %n, { i32 } %dummy) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  coro.return:
-; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr [[BUFFER:%.*]], i64 8
+; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[BUFFER:%.*]], i64 8
 ; CHECK-NEXT:    store i32 [[N:%.*]], ptr [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr @allocate(i32 [[N]])
 ; CHECK-NEXT:    store ptr [[TMP0]], ptr [[BUFFER]], align 8

diff  --git a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
index 9b00d7929684fe..aeb959e5ce7118 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll
@@ -7,7 +7,7 @@ declare {ptr, ptr, i32} @prototype_f(ptr, i1)
 define {ptr, ptr, i32} @f(ptr %buffer, i32 %n) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  coro.return:
-; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr [[BUFFER:%.*]], i64 8
+; CHECK-NEXT:    [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[BUFFER:%.*]], i64 8
 ; CHECK-NEXT:    store i32 [[N:%.*]], ptr [[N_VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr @allocate(i32 [[N]])
 ; CHECK-NEXT:    store ptr [[TMP0]], ptr [[BUFFER]], align 8

diff  --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
index b2da9affcd500a..fa10ddefee00eb 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-value.ll
@@ -159,7 +159,7 @@ declare void @print(i32)
 ; CHECK-NEXT:  PostSpill:
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr @allocate(i32 16)
 ; CHECK-NEXT:    store ptr [[TMP0]], ptr [[BUFFER:%.*]], align 8
-; CHECK-NEXT:    [[VAL_SPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[VAL_SPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; CHECK-NEXT:    store i32 [[VAL:%.*]], ptr [[VAL_SPILL_ADDR]], align 4
 ; CHECK-NEXT:    store ptr [[ARRAY:%.*]], ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[ARRAY]], align 4
@@ -180,7 +180,7 @@ declare void @print(i32)
 ; CHECK-NEXT:    store i32 0, ptr [[ARRAY_RELOAD]], align 4
 ; CHECK-NEXT:    br label [[COROEND]]
 ; CHECK:       CoroEnd:
-; CHECK-NEXT:    [[VAL_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
+; CHECK-NEXT:    [[VAL_RELOAD_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 ; CHECK-NEXT:    [[VAL_RELOAD:%.*]] = load i32, ptr [[VAL_RELOAD_ADDR]], align 4
 ; CHECK-NEXT:    [[NEW_VAL:%.*]] = add i32 [[VAL_RELOAD]], 123
 ; CHECK-NEXT:    tail call void @deallocate(ptr [[TMP2]])
@@ -198,7 +198,7 @@ declare void @print(i32)
 ; CHECK-NEXT:    store i32 10, ptr [[ARRAY_RELOAD]], align 4
 ; CHECK-NEXT:    br label [[COROEND]]
 ; CHECK:       CoroEnd:
-; CHECK-NEXT:    [[VAL_RELOAD_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
+; CHECK-NEXT:    [[VAL_RELOAD_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 ; CHECK-NEXT:    [[VAL_RELOAD:%.*]] = load i32, ptr [[VAL_RELOAD_ADDR]], align 4
 ; CHECK-NEXT:    [[NEW_VAL:%.*]] = add i32 [[VAL_RELOAD]], 123
 ; CHECK-NEXT:    tail call void @deallocate(ptr [[TMP2]])

diff  --git a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
index 4c10cb3805c7c3..907d7e588ffe0c 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll
@@ -38,16 +38,16 @@ define i32 @main() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = tail call ptr @allocate(i32 12)
 ; CHECK-NEXT:    store i32 1, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
-; CHECK-NEXT:    store i32 1, ptr [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias !0
-; CHECK-NEXT:    [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
-; CHECK-NEXT:    store i32 2, ptr [[INPUT_SPILL_ADDR_I]], align 4, !noalias !0
-; CHECK-NEXT:    [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
-; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
-; CHECK-NEXT:    store i32 3, ptr [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias !3
-; CHECK-NEXT:    store i32 4, ptr [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias !3
-; CHECK-NEXT:    tail call void @print(i32 7), !noalias !6
-; CHECK-NEXT:    tail call void @deallocate(ptr nonnull [[TMP0]]), !noalias !6
+; CHECK-NEXT:    [[N_VAL3_SPILL_ADDR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    store i32 1, ptr [[N_VAL3_SPILL_ADDR_I]], align 4, !noalias [[META0:![0-9]+]]
+; CHECK-NEXT:    [[INPUT_SPILL_ADDR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    store i32 2, ptr [[INPUT_SPILL_ADDR_I]], align 4, !noalias [[META0]]
+; CHECK-NEXT:    [[INPUT_RELOAD_ADDR13_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[N_VAL3_RELOAD_ADDR11_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    store i32 3, ptr [[N_VAL3_RELOAD_ADDR11_I]], align 4, !noalias [[META3:![0-9]+]]
+; CHECK-NEXT:    store i32 4, ptr [[INPUT_RELOAD_ADDR13_I]], align 4, !noalias [[META3]]
+; CHECK-NEXT:    tail call void @print(i32 7), !noalias [[META6:![0-9]+]]
+; CHECK-NEXT:    tail call void @deallocate(ptr nonnull [[TMP0]]), !noalias [[META6]]
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:

diff  --git a/llvm/test/Transforms/Coroutines/coro-swifterror.ll b/llvm/test/Transforms/Coroutines/coro-swifterror.ll
index de28ed56081682..899be4a010326d 100644
--- a/llvm/test/Transforms/Coroutines/coro-swifterror.ll
+++ b/llvm/test/Transforms/Coroutines/coro-swifterror.ll
@@ -50,7 +50,7 @@ define ptr @g(ptr %buffer, i32 %n) {
 ; CHECK-NEXT:    store ptr null, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    call void @maybeThrow(ptr nonnull swifterror [[TMP0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds i8, ptr [[BUFFER]], i32 4
+; CHECK-NEXT:    [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds nuw i8, ptr [[BUFFER]], i32 4
 ; CHECK-NEXT:    store ptr [[TMP1]], ptr [[DOTSPILL_ADDR]], align 4
 ; CHECK-NEXT:    call void @logError(ptr [[TMP1]])
 ; CHECK-NEXT:    ret ptr @g.resume.0

diff  --git a/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
index 27005f38f29f11..b8d570637963d0 100644
--- a/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
+++ b/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
@@ -10,7 +10,7 @@ define i32 @main() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[U:%.*]] = alloca [[STRUCT__1ANON:%.*]], align 8
 ; CHECK-NEXT:    store double 0x7FF0000000000000, ptr [[U]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[TMP6]], 2146435072
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP0]], 2146435072

diff  --git a/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
index c31f6d4b4a1fe5..870b71e8128266 100644
--- a/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
+++ b/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -13,7 +13,7 @@ define i32 @bar(i64 %key_token2) nounwind {
 ; CHECK-NEXT:    [[IOSPEC:%.*]] = alloca [[STRUCT_KEY:%.*]], align 8
 ; CHECK-NEXT:    [[RET:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 0, ptr [[IOSPEC]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IOSPEC]], i32 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[IOSPEC]], i32 4
 ; CHECK-NEXT:    store i32 0, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    store i64 [[KEY_TOKEN2:%.*]], ptr [[IOSPEC]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 (...) @foo(ptr nonnull byval([[STRUCT_KEY]]) align 4 [[IOSPEC]], ptr nonnull [[RET]]) #[[ATTR0:[0-9]+]]

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
index dacdd413013658..a93091a73a065a 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll
@@ -35,7 +35,7 @@ define void @simple_nonmatching() #0 {
 define ptr @simple_safe_instruction(ptr %p) #0 {
 ; CHECK-LABEL: define ptr @simple_safe_instruction(
 ; CHECK-SAME: ptr [[P:%.*]]) {
-; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
 ; CHECK-NEXT:    call void @llvm.aarch64.dmb(i32 10)
 ; CHECK-NEXT:    ret ptr [[RES]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
index f03b633c581d6e..9c2476578974ad 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
@@ -146,7 +146,7 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5:%.*]], i64 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5:%.*]], i64 4
 ; CHECK-NEXT:    store float [[TMP10]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i64 1
 ; CHECK-NEXT:    store float [[TMP12]], ptr [[TMP11]], align 4

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
index 9b0387e51eead7..8bc9bb2dadb629 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
@@ -146,7 +146,7 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5:%.*]], i64 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5:%.*]], i64 4
 ; CHECK-NEXT:    store float [[TMP10]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i64 1
 ; CHECK-NEXT:    store float [[TMP12]], ptr [[TMP11]], align 4

diff  --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll
index 4f4ae17bebc503..3edb47dda62cc2 100644
--- a/llvm/test/Transforms/InstCombine/array.ll
+++ b/llvm/test/Transforms/InstCombine/array.ll
@@ -97,7 +97,7 @@ define void @test_zext_missing_nneg(ptr %ptr, i32 %a, i32 %b) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[A]], 10
 ; CHECK-NEXT:    [[IDX:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[IDX]]
 ; CHECK-NEXT:    store i32 [[B]], ptr [[GEP]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -116,8 +116,8 @@ define ptr @gep_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
 ; CHECK-NEXT:    [[B_NNEG:%.*]] = icmp sgt i64 [[B]], -1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[B_NNEG]])
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[A]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 [[B]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
   %a.nneg = icmp sgt i64 %a, -1
@@ -207,8 +207,8 @@ define ptr @gep_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) {
 ; CHECK-NEXT:    [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[A_NNEG]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[A]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP1]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 40
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 40
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
   %a.nneg = icmp sgt i32 %a, -1

diff  --git a/llvm/test/Transforms/InstCombine/assume-align.ll b/llvm/test/Transforms/InstCombine/assume-align.ll
index 2b8ca5d25fd1a8..47659ff8c84909 100644
--- a/llvm/test/Transforms/InstCombine/assume-align.ll
+++ b/llvm/test/Transforms/InstCombine/assume-align.ll
@@ -6,7 +6,7 @@ declare void @llvm.assume(i1 noundef)
 define void @f1(ptr %a) {
 ; CHECK-LABEL: @f1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 4
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[PTR]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], 3
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
@@ -50,7 +50,7 @@ define void @f2(ptr %a) {
 ; CHECK-LABEL: @f2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 32, i32 24) ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0

diff  --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
index e7eb18c61b6bb0..24fd343d1448e7 100644
--- a/llvm/test/Transforms/InstCombine/assume-loop-align.ll
+++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
@@ -21,10 +21,10 @@ define void @foo(ptr %a, ptr %b) #0 {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32

diff  --git a/llvm/test/Transforms/InstCombine/assume-redundant.ll b/llvm/test/Transforms/InstCombine/assume-redundant.ll
index 63f8859371f3a3..32d803e35a1c30 100644
--- a/llvm/test/Transforms/InstCombine/assume-redundant.ll
+++ b/llvm/test/Transforms/InstCombine/assume-redundant.ll
@@ -1,7 +1,11 @@
-; RUN: opt -passes='require<domtree>,instcombine,require<domtree>' -S < %s | FileCheck %s
+; RUN: opt -passes='require<domtree>,instcombine<no-verify-fixpoint>,require<domtree>' -S < %s | FileCheck %s
 ; Note: The -loops above can be anything that requires the domtree, and is
 ; necessary to work around a pass-manager bug.
 
+; In _Z3fooR1s we can only infer nuw on the gep after the IV has been
+; simplified, which we can't do in one iteration. (Note that nowadays
+; LoopUnroll would pre-simplify the IV and avoid the issue.)
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 

diff  --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
index a728c294628cad..52f0adf02a3965 100644
--- a/llvm/test/Transforms/InstCombine/assume.ll
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -774,7 +774,7 @@ exit:
 
 define void @canonicalize_assume(ptr %0) {
 ; DEFAULT-LABEL: @canonicalize_assume(
-; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 8
+; DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 8
 ; DEFAULT-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP2]], i64 16) ]
 ; DEFAULT-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InstCombine/call-cast-target.ll b/llvm/test/Transforms/InstCombine/call-cast-target.ll
index e5bb929ce92f0c..2cedc6c81d735f 100644
--- a/llvm/test/Transforms/InstCombine/call-cast-target.ll
+++ b/llvm/test/Transforms/InstCombine/call-cast-target.ll
@@ -114,7 +114,7 @@ define i1 @test5() {
 ; CHECK-LABEL: define i1 @test5() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca { i32, i32 }, align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i1 @fn5(i32 [[TMP2]], i32 [[TMP4]])
 ; CHECK-NEXT:    ret i1 [[TMP5]]

diff  --git a/llvm/test/Transforms/InstCombine/cast_phi.ll b/llvm/test/Transforms/InstCombine/cast_phi.ll
index 80b07139faf4bf..aafe5f57c4c724 100644
--- a/llvm/test/Transforms/InstCombine/cast_phi.ll
+++ b/llvm/test/Transforms/InstCombine/cast_phi.ll
@@ -9,16 +9,16 @@ define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
 ; CHECK-NEXT:    [[CALLB:%.*]] = alloca [258 x float], align 4
 ; CHECK-NEXT:    [[CONV_I:%.*]] = uitofp i32 [[INUMSTEPS:%.*]] to float
 ; CHECK-NEXT:    [[CONV_I12:%.*]] = zext i32 [[TID:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I12]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I12]]
 ; CHECK-NEXT:    store float [[CONV_I]], ptr [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I12]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I12]]
 ; CHECK-NEXT:    store float [[CONV_I]], ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[TID]], 0
 ; CHECK-NEXT:    br i1 [[CMP7]], label [[DOTBB1:%.*]], label [[DOTBB2:%.*]]
 ; CHECK:       .bb1:
-; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[CALLA]], i64 1024
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLA]], i64 1024
 ; CHECK-NEXT:    store float [[CONV_I]], ptr [[ARRAYIDX10]], align 4
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[CALLB]], i64 1024
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[CALLB]], i64 1024
 ; CHECK-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX11]], align 4
 ; CHECK-NEXT:    br label [[DOTBB2]]
 ; CHECK:       .bb2:

diff  --git a/llvm/test/Transforms/InstCombine/cast_ptr.ll b/llvm/test/Transforms/InstCombine/cast_ptr.ll
index db576b9679b14f..40ba21b24435bd 100644
--- a/llvm/test/Transforms/InstCombine/cast_ptr.ll
+++ b/llvm/test/Transforms/InstCombine/cast_ptr.ll
@@ -407,7 +407,7 @@ define i32 @ptr_add_in_int_extra_use1(i32 %x) {
 ; CHECK-LABEL: @ptr_add_in_int_extra_use1(
 ; CHECK-NEXT:    [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr
 ; CHECK-NEXT:    call void @use_ptr(ptr [[PTR]])
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 4096
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i32 4096
 ; CHECK-NEXT:    [[R:%.*]] = ptrtoint ptr [[P2]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -421,7 +421,7 @@ define i32 @ptr_add_in_int_extra_use1(i32 %x) {
 define i32 @ptr_add_in_int_extra_use2(i32 %x) {
 ; CHECK-LABEL: @ptr_add_in_int_extra_use2(
 ; CHECK-NEXT:    [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 4096
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i32 4096
 ; CHECK-NEXT:    call void @use_ptr(ptr nonnull [[P2]])
 ; CHECK-NEXT:    [[R:%.*]] = ptrtoint ptr [[P2]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]

diff  --git a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
index cb87ee67a45187..720f5258a6346a 100644
--- a/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
+++ b/llvm/test/Transforms/InstCombine/catchswitch-phi.ll
@@ -22,11 +22,11 @@ define void @test0(i1 %c1) personality ptr @__gxx_wasm_personality_v0 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_BLAM:%.*]], align 4
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 4
 ; CHECK-NEXT:    invoke void @foo()
 ; CHECK-NEXT:            to label [[BB3:%.*]] unwind label [[BB4:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 4
 ; CHECK-NEXT:    invoke void @foo()
 ; CHECK-NEXT:            to label [[BB3]] unwind label [[BB4]]
 ; CHECK:       bb3:
@@ -120,10 +120,10 @@ define void @test1() personality ptr @__gxx_wasm_personality_v0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = catchswitch within none [label %catch.start1] unwind to caller
 ; CHECK:       catch.start1:
 ; CHECK-NEXT:    [[TMP3:%.*]] = catchpad within [[TMP2]] [ptr null]
-; CHECK-NEXT:    [[TOBOOL1_NOT:%.*]] = trunc i8 [[AP_2]] to i1
-; CHECK-NEXT:    br i1 [[TOBOOL1_NOT]], label [[IF_END1:%.*]], label [[IF_THEN1:%.*]]
+; CHECK-NEXT:    [[TOBOOL1:%.*]] = trunc i8 [[AP_2]] to i1
+; CHECK-NEXT:    br i1 [[TOBOOL1]], label [[IF_THEN1:%.*]], label [[IF_END1:%.*]]
 ; CHECK:       if.then1:
-; CHECK-NEXT:    br label [[IF_THEN1]]
+; CHECK-NEXT:    br label [[IF_END1]]
 ; CHECK:       if.end1:
 ; CHECK-NEXT:    catchret from [[TMP3]] to label [[TRY_CONT]]
 ; CHECK:       try.cont:

diff  --git a/llvm/test/Transforms/InstCombine/compare-alloca.ll b/llvm/test/Transforms/InstCombine/compare-alloca.ll
index c2639d39bc6566..e6342520c6fa9a 100644
--- a/llvm/test/Transforms/InstCombine/compare-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/compare-alloca.ll
@@ -75,7 +75,7 @@ define i1 @alloca_argument_compare_escaped_through_store(ptr %arg, ptr %ptr) {
 ; CHECK-LABEL: @alloca_argument_compare_escaped_through_store(
 ; CHECK-NEXT:    [[ALLOC:%.*]] = alloca i64, align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[ALLOC]], [[ARG:%.*]]
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds i8, ptr [[ALLOC]], i32 8
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds nuw i8, ptr [[ALLOC]], i32 8
 ; CHECK-NEXT:    store ptr [[P]], ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/compare-unescaped.ll b/llvm/test/Transforms/InstCombine/compare-unescaped.ll
index 02eb464c814c81..c48c754e3941b9 100644
--- a/llvm/test/Transforms/InstCombine/compare-unescaped.ll
+++ b/llvm/test/Transforms/InstCombine/compare-unescaped.ll
@@ -79,7 +79,7 @@ declare void @escape(ptr)
 define i1 @compare_and_call_after() {
 ; CHECK-LABEL: @compare_and_call_after(
 ; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24)
-; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull !0
+; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[M]], [[LGP]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]]
 ; CHECK:       escape_call:
@@ -151,7 +151,7 @@ define ptr @compare_ret_escape(ptr %c) {
 ; CHECK:       retst:
 ; CHECK-NEXT:    ret ptr [[M]]
 ; CHECK:       chk:
-; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull !0
+; CHECK-NEXT:    [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0]]
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr [[M]], [[LGP]]
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]]
 ; CHECK:       chk2:
@@ -382,8 +382,8 @@ define i1 @two_nonnull_mallocs_hidden() {
 ; CHECK-LABEL: @two_nonnull_mallocs_hidden(
 ; CHECK-NEXT:    [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
 ; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4)
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[M]], i64 1
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[N]], i64 2
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds nuw i8, ptr [[N]], i64 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/dependent-ivs.ll b/llvm/test/Transforms/InstCombine/dependent-ivs.ll
index 218e619643dfbb..bfd01c6b689416 100644
--- a/llvm/test/Transforms/InstCombine/dependent-ivs.ll
+++ b/llvm/test/Transforms/InstCombine/dependent-ivs.ll
@@ -513,7 +513,7 @@ define void @ptr_iv_inbounds(ptr %base, i64 %end) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[IV_PTR:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i64 [[IV]]
+; CHECK-NEXT:    [[IV_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IV]]
 ; CHECK-NEXT:    call void @use.p0(ptr [[IV_PTR]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[END]]

diff  --git a/llvm/test/Transforms/InstCombine/extractvalue.ll b/llvm/test/Transforms/InstCombine/extractvalue.ll
index 0b7c6498390631..ba46dd33a59b0a 100644
--- a/llvm/test/Transforms/InstCombine/extractvalue.ll
+++ b/llvm/test/Transforms/InstCombine/extractvalue.ll
@@ -51,7 +51,7 @@ define i32 @foo(i32 %a, i32 %b) {
 ; The new load should be in the same spot as the old load.
 define i32 @extract2gep(ptr %pair, ptr %P) {
 ; CHECK-LABEL: @extract2gep(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PAIR:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[PAIR:%.*]], i64 4
 ; CHECK-NEXT:    [[E:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    store i32 0, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -82,7 +82,7 @@ end:
 ; to a 3-index inbounds gep + smaller load.
 define i16 @doubleextract2gep(ptr %arg) {
 ; CHECK-LABEL: @doubleextract2gep(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG:%.*]], i64 8
 ; CHECK-NEXT:    [[E2:%.*]] = load i16, ptr [[TMP1]], align 2
 ; CHECK-NEXT:    ret i16 [[E2]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 70cbb4306ec67a..5701689b68a520 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -1098,7 +1098,7 @@ define void @fmul_loop_invariant_fdiv(ptr %a, float %x) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[I_08]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[F:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[M:%.*]] = fdiv fast float [[F]], [[X:%.*]]
 ; CHECK-NEXT:    store float [[M]], ptr [[ARRAYIDX]], align 4

diff  --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 79d3360377e506..434cd810296d8c 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -760,17 +760,17 @@ define i32 @fsh_load_rotate_12(ptr %data) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DATA:%.*]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 [[CONV]], 24
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 16
 ; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[SHL3]], [[SHL]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; CHECK-NEXT:    [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 8
 ; CHECK-NEXT:    [[OR7:%.*]] = or disjoint i32 [[OR]], [[SHL6]]
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
 ; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
 ; CHECK-NEXT:    [[OR10:%.*]] = or disjoint i32 [[OR7]], [[CONV9]]
@@ -808,17 +808,17 @@ define i32 @fsh_load_rotate_25(ptr %data) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[DATA:%.*]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 [[CONV]], 24
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[SHL3:%.*]] = shl nuw nsw i32 [[CONV2]], 16
 ; CHECK-NEXT:    [[OR:%.*]] = or disjoint i32 [[SHL3]], [[SHL]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; CHECK-NEXT:    [[CONV5:%.*]] = zext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[SHL6:%.*]] = shl nuw nsw i32 [[CONV5]], 8
 ; CHECK-NEXT:    [[OR7:%.*]] = or disjoint i32 [[OR]], [[SHL6]]
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
 ; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP3]] to i32
 ; CHECK-NEXT:    [[OR10:%.*]] = or disjoint i32 [[OR7]], [[CONV9]]

diff  --git a/llvm/test/Transforms/InstCombine/gep-addrspace.ll b/llvm/test/Transforms/InstCombine/gep-addrspace.ll
index 789a31e5939466..0d1850b8fa020c 100644
--- a/llvm/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/llvm/test/Transforms/InstCombine/gep-addrspace.ll
@@ -45,7 +45,7 @@ define { i8, i8 } @inbounds_after_addrspacecast() {
 ; CHECK-NEXT:    [[T0:%.*]] = alloca i16, align 2
 ; CHECK-NEXT:    call void @escape_alloca(ptr nonnull [[T0]])
 ; CHECK-NEXT:    [[T1:%.*]] = addrspacecast ptr [[T0]] to ptr addrspace(11)
-; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds i8, ptr addrspace(11) [[T1]], i64 1
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(11) [[T1]], i64 1
 ; CHECK-NEXT:    [[T3:%.*]] = load i8, ptr addrspace(11) [[T2]], align 1
 ; CHECK-NEXT:    [[INSERT:%.*]] = insertvalue { i8, i8 } zeroinitializer, i8 [[T3]], 1
 ; CHECK-NEXT:    ret { i8, i8 } [[INSERT]]

diff  --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
index 849793c7a85aff..fa006ff992ba37 100644
--- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
@@ -13,7 +13,7 @@ declare void @use(i1)
 ; result = (((ptr) p + a) + b) + 1
 define ptr @basic(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @basic(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]]
 ; CHECK-NEXT:    ret ptr [[TMP3]]
@@ -50,9 +50,9 @@ define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
 ; result = ((ptr) p + a) + 3
 define ptr @merge(ptr %p, i64 %a) {
 ; CHECK-LABEL: @merge(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 ; CHECK-NEXT:    ret ptr [[TMP3]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
@@ -67,12 +67,12 @@ define ptr @merge(ptr %p, i64 %a) {
 ; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
 define ptr @nested(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @nested(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 128
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 128
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 16
 ; CHECK-NEXT:    ret ptr [[TMP6]]
 ;
   %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
@@ -87,7 +87,7 @@ define ptr @nested(ptr %p, i64 %a, i64 %b) {
 ; It is valid to swap if the source operand of the first GEP has multiple uses.
 define ptr @multipleUses1(ptr %p) {
 ; CHECK-LABEL: @multipleUses1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
 ; CHECK-NEXT:    ret ptr [[TMP3]]
@@ -101,7 +101,7 @@ define ptr @multipleUses1(ptr %p) {
 ; It is valid to swap if the second GEP has multiple uses.
 define ptr @multipleUses2(ptr %p, i64 %a) {
 ; CHECK-LABEL: @multipleUses2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
 ; CHECK-NEXT:    call void @use(ptr nonnull [[TMP2]])
 ; CHECK-NEXT:    ret ptr [[TMP2]]
@@ -115,7 +115,7 @@ define ptr @multipleUses2(ptr %p, i64 %a) {
 ; Negative test. It is not valid to swap if the first GEP has multiple uses.
 define ptr @multipleUses3(ptr %p) {
 ; CHECK-LABEL: @multipleUses3(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
 ; CHECK-NEXT:    ret ptr [[TMP3]]

diff  --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
index 5f22a354570157..9b266b097057cf 100644
--- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
+++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
@@ -8,9 +8,9 @@ define i32 @foo(ptr nocapture readnone %match, i32 %cur_match, i32 %best_len, i3
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT2:%.*]] = zext i32 [[CUR_MATCH:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR4:%.*]] = getelementptr inbounds i8, ptr [[WIN:%.*]], i64 [[IDX_EXT2]]
+; CHECK-NEXT:    [[ADD_PTR4:%.*]] = getelementptr inbounds nuw i8, ptr [[WIN:%.*]], i64 [[IDX_EXT2]]
 ; CHECK-NEXT:    [[IDX_EXT1:%.*]] = zext i32 [[BEST_LEN:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR25:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR4]], i64 [[IDX_EXT1]]
+; CHECK-NEXT:    [[ADD_PTR25:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR4]], i64 [[IDX_EXT1]]
 ; CHECK-NEXT:    [[ADD_PTR36:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR25]], i64 -1
 ; CHECK-NEXT:    [[I1:%.*]] = load i32, ptr [[ADD_PTR36]], align 4
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[I1]], [[SCAN_END:%.*]]
@@ -19,8 +19,8 @@ define i32 @foo(ptr nocapture readnone %match, i32 %cur_match, i32 %best_len, i3
 ; CHECK-NEXT:    br label [[IF_THEN:%.*]]
 ; CHECK:       do.body:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[I4:%.*]] to i64
-; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[WIN]], i64 [[IDX_EXT]]
-; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT1]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[WIN]], i64 [[IDX_EXT]]
+; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 [[IDX_EXT1]]
 ; CHECK-NEXT:    [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR2]], i64 -1
 ; CHECK-NEXT:    [[I3:%.*]] = load i32, ptr [[ADD_PTR3]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I3]], [[SCAN_END]]
@@ -30,7 +30,7 @@ define i32 @foo(ptr nocapture readnone %match, i32 %cur_match, i32 %best_len, i3
 ; CHECK-NEXT:    [[CHAIN_LENGTH_ADDR_08:%.*]] = phi i32 [ [[CHAIN_LENGTH:%.*]], [[IF_THEN_LR_PH]] ], [ [[DEC:%.*]], [[DO_BODY]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[CUR_MATCH_ADDR_09]], [[WMASK:%.*]]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[AND]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREV:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[PREV:%.*]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[I4]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i32 [[I4]], [[LIMIT:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP4]], label [[LAND_LHS_TRUE:%.*]], label [[DO_END]]
@@ -94,7 +94,7 @@ define void @PR37005(ptr %base, ptr %in) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[E1:%.*]] = getelementptr inbounds ptr, ptr [[IN:%.*]], i64 undef
-; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48
+; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds nuw i8, ptr [[E1]], i64 48
 ; CHECK-NEXT:    [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> <i64 0, i64 1>
 ; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14)
@@ -126,11 +126,11 @@ define void @PR37005_2(ptr %base, ptr %in) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[E1:%.*]] = getelementptr inbounds ptr, ptr [[IN:%.*]], i64 undef
-; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48
+; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds nuw i8, ptr [[E1]], i64 48
 ; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint ptr [[E2]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[PI1]], 14
 ; CHECK-NEXT:    [[SL1:%.*]] = and i64 [[TMP0]], 1125899906842496
-; CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], i64 [[SL1]]
+; CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[SL1]]
 ; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, ptr [[E5]], <2 x i64> <i64 80, i64 60>
 ; CHECK-NEXT:    call void @blackhole(<2 x ptr> [[E6]])
 ; CHECK-NEXT:    br label [[LOOP]]
@@ -156,7 +156,7 @@ define void @PR37005_3(<2 x ptr> %base, ptr %in) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[E1:%.*]] = getelementptr inbounds ptr, ptr [[IN:%.*]], i64 undef
-; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48
+; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds nuw i8, ptr [[E1]], i64 48
 ; CHECK-NEXT:    [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> <i64 0, i64 1>
 ; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14)
@@ -222,7 +222,7 @@ define float @gep_cross_loop(ptr %_arg_, ptr %_arg_3, float %_arg_8) {
 ; CHECK:       for.cond.i.i.i.preheader:
 ; CHECK-NEXT:    ret float [[SUM]]
 ; CHECK:       for.body.i:
-; CHECK-NEXT:    [[ARRAYIDX_I84_I:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX]]
+; CHECK-NEXT:    [[ARRAYIDX_I84_I:%.*]] = getelementptr inbounds nuw float, ptr [[ADD_PTR]], i64 [[IDX]]
 ; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[ARRAYIDX_I84_I]], align 4
 ; CHECK-NEXT:    [[ADD_I]] = fadd fast float [[SUM]], [[I1]]
 ; CHECK-NEXT:    [[ADD11_I]] = add nuw nsw i64 [[IDX]], 1
@@ -259,7 +259,7 @@ define void @only_one_inbounds(ptr %ptr, i1 %c, i32 noundef %arg1, i32 noundef %
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[ARG1_EXT:%.*]] = zext i32 [[ARG1:%.*]] to i64
-; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[ARG1_EXT]]
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 [[ARG1_EXT]]
 ; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 [[ARG2_EXT]]
 ; CHECK-NEXT:    call void @use(ptr [[PTR3]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
@@ -287,7 +287,7 @@ define void @both_inbounds_one_neg(ptr %ptr, i1 %c, i32 noundef %arg) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64
-; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[ARG_EXT]]
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 [[ARG_EXT]]
 ; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR2]], i64 -1
 ; CHECK-NEXT:    call void @use(ptr nonnull [[PTR3]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
@@ -314,8 +314,8 @@ define void @both_inbounds_pos(ptr %ptr, i1 %c, i32 noundef %arg) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[ARG_EXT:%.*]] = zext i32 [[ARG:%.*]] to i64
-; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[ARG_EXT]]
-; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR2]], i64 1
+; CHECK-NEXT:    [[PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 [[ARG_EXT]]
+; CHECK-NEXT:    [[PTR3:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR2]], i64 1
 ; CHECK-NEXT:    call void @use(ptr nonnull [[PTR3]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:

diff  --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
index edec54495c1ae6..46f42e16c83ce4 100644
--- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
@@ -12,7 +12,7 @@ target datalayout = "i24:8:8"
 ; result = (ptr) p + 3
 define ptr @mergeBasic(ptr %p) {
 ; CHECK-LABEL: @mergeBasic(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 12
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 12
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
@@ -24,7 +24,7 @@ define ptr @mergeBasic(ptr %p) {
 ; result = (ptr) p + 10
 define ptr @mergeDifferentTypes(ptr %p) {
 ; CHECK-LABEL: @mergeDifferentTypes(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 10
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i8, ptr %p, i64 2
@@ -36,7 +36,7 @@ define ptr @mergeDifferentTypes(ptr %p) {
 ; result = (ptr) p + 10
 define ptr @mergeReverse(ptr %p) {
 ; CHECK-LABEL: @mergeReverse(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 10
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 1
@@ -58,7 +58,7 @@ define ptr @zeroSum(ptr %p) {
 ; result = (ptr) ((ptr) p + 1) + 17
 define ptr @array1(ptr %p) {
 ; CHECK-LABEL: @array1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 37
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 37
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds [20 x i8], ptr %p, i64 1, i64 1
@@ -70,7 +70,7 @@ define ptr @array1(ptr %p) {
 ; result = (ptr) p + 20
 define ptr @array2(ptr %p) {
 ; CHECK-LABEL: @array2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 20
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 2
@@ -82,7 +82,7 @@ define ptr @array2(ptr %p) {
 ; result = (ptr) p + 36
 define ptr @struct1(ptr %p) {
 ; CHECK-LABEL: @struct1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 36
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i64, ptr %p, i64 3
@@ -104,7 +104,7 @@ define ptr @struct2(ptr %p) {
 ; result = (ptr) &((struct.B) p)[0].member2.member0 + 7
 define ptr @structStruct(ptr %p) {
 ; CHECK-LABEL: @structStruct(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 15
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 15
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds %struct.B, ptr %p, i64 0, i32 2, i32 0, i64 3
@@ -132,7 +132,7 @@ define ptr @appendIndex(ptr %p, i64 %i) {
 ; result = (ptr) p + 7
 define ptr @notDivisible(ptr %p) {
 ; CHECK-LABEL: @notDivisible(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 7
 ; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
   %1 = getelementptr inbounds i24, ptr %p, i64 1
@@ -154,7 +154,7 @@ define ptr @partialConstant2(ptr %p, i64 %a) {
 
 define ptr @partialConstant3(ptr %p) {
 ; CHECK-LABEL: @partialConstant3(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; CHECK-NEXT:    [[DOTIDX:%.*]] = shl nsw i64 [[TMP2]], 5
 ; CHECK-NEXT:    [[DOTOFFS:%.*]] = or disjoint i64 [[DOTIDX]], 16
@@ -186,7 +186,7 @@ define ptr @partialConstantMemberAliasing1(ptr %p, i64 %a) {
 define ptr @partialConstantMemberAliasing2(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstantMemberAliasing2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 1
 ; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.C, ptr %p, i64 %a, i32 1
@@ -199,7 +199,7 @@ define ptr @partialConstantMemberAliasing2(ptr %p, i64 %a) {
 define ptr @partialConstantMemberAliasing3(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstantMemberAliasing3(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 [[A:%.*]], i32 2
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
 ; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds %struct.C, ptr %p, i64 %a, i32 2

diff  --git a/llvm/test/Transforms/InstCombine/gep-vector-indices.ll b/llvm/test/Transforms/InstCombine/gep-vector-indices.ll
index 9f33f4e9c206a0..660ce317a0f646 100644
--- a/llvm/test/Transforms/InstCombine/gep-vector-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-vector-indices.ll
@@ -14,7 +14,7 @@ define ptr @vector_splat_indices_v2i64_ext0(ptr %a) {
 
 define ptr @vector_splat_indices_nxv2i64_ext0(ptr %a) {
 ; CHECK-LABEL: @vector_splat_indices_nxv2i64_ext0(
-; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; CHECK-NEXT:    ret ptr [[RES]]
 ;
   %tmp = insertelement <vscale x 2 x i64> poison, i64 4, i32 0

diff  --git a/llvm/test/Transforms/InstCombine/gepphigep.ll b/llvm/test/Transforms/InstCombine/gepphigep.ll
index 93266c67543963..fb4d61182d05c8 100644
--- a/llvm/test/Transforms/InstCombine/gepphigep.ll
+++ b/llvm/test/Transforms/InstCombine/gepphigep.ll
@@ -54,7 +54,7 @@ define i32 @test2(ptr %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
 ; CHECK-NEXT:    store i32 0, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP1]], i64 [[TMP19:%.*]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP20]], align 4
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP10]], i64 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP25]]
 ;
@@ -86,12 +86,12 @@ define i32 @test3(ptr %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19, i64 %tmp20, i64 %tmp
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP19]], [[BB1]] ], [ [[TMP20]], [[BB2]] ]
 ; CHECK-NEXT:    [[TMP22:%.*]] = invoke i32 @foo1(i32 11)
-; CHECK-NEXT:    to label [[BB4:%.*]] unwind label [[BB5:%.*]]
+; CHECK-NEXT:            to label [[BB4:%.*]] unwind label [[BB5:%.*]]
 ; CHECK:       bb4:
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       bb5:
 ; CHECK-NEXT:    [[TMP27:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT:    catch ptr @_ZTIi
+; CHECK-NEXT:            catch ptr @_ZTIi
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT3]], ptr [[DM]], i64 [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [[STRUCT4:%.*]], ptr [[TMP1]], i64 [[TMP21:%.*]], i32 1, i32 1
 ; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP35]], align 4
@@ -189,7 +189,7 @@ define void @test5(ptr %idx, ptr %in) #0 {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[IN:%.*]], align 8
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 1
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
 ; CHECK-NEXT:    [[CMP23:%.*]] = icmp eq i8 [[TMP1]], 54
 ; CHECK-NEXT:    br i1 [[CMP23]], label [[WHILE_COND:%.*]], label [[IF_THEN_25:%.*]]
@@ -211,7 +211,7 @@ define void @test5(ptr %idx, ptr %in) #0 {
 ; CHECK:       while.cond.57:
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR34]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
-; CHECK-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i16, ptr [[IDX:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds nuw i16, ptr [[IDX:%.*]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX61]], align 2
 ; CHECK-NEXT:    [[AND63:%.*]] = and i16 [[TMP5]], 2048
 ; CHECK-NEXT:    [[TOBOOL64:%.*]] = icmp eq i16 [[AND63]], 0

diff  --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 2ea65eb443cdf6..6851cbf6ce2d6f 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -686,7 +686,7 @@ define i32 @test28() nounwind  {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ORIENTATIONS:%.*]] = alloca [1 x [1 x %struct.x]], align 8
 ; CHECK-NEXT:    [[T3:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @.str) #[[ATTR0]]
-; CHECK-NEXT:    [[T45:%.*]] = getelementptr inbounds i8, ptr [[ORIENTATIONS]], i64 1
+; CHECK-NEXT:    [[T45:%.*]] = getelementptr inbounds nuw i8, ptr [[ORIENTATIONS]], i64 1
 ; CHECK-NEXT:    br label [[BB10:%.*]]
 ; CHECK:       bb10:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ]
@@ -796,9 +796,9 @@ define ptr @test32(ptr %v) {
 ; CHECK-LABEL: @test32(
 ; CHECK-NEXT:    [[A:%.*]] = alloca [4 x ptr], align 16
 ; CHECK-NEXT:    store ptr null, ptr [[A]], align 8
-; CHECK-NEXT:    [[D:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[D:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 8
 ; CHECK-NEXT:    store ptr [[V:%.*]], ptr [[D]], align 8
-; CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
+; CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 16
 ; CHECK-NEXT:    [[G:%.*]] = load ptr, ptr [[F]], align 8
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
@@ -935,7 +935,7 @@ declare void @pr10322_f3(ptr)
 define void @pr10322_f1(ptr %foo) {
 ; CHECK-LABEL: @pr10322_f1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 16
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO:%.*]], i64 16
 ; CHECK-NEXT:    call void @pr10322_f2(ptr nonnull [[ARRAYIDX8]]) #[[ATTR0]]
 ; CHECK-NEXT:    call void @pr10322_f3(ptr nonnull [[ARRAYIDX8]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret void
@@ -975,7 +975,7 @@ declare void @three_gep_h(ptr)
 
 define void @test39(ptr %arg, i8 %arg1) nounwind {
 ; CHECK-LABEL: @test39(
-; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 16
+; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG:%.*]], i64 16
 ; CHECK-NEXT:    [[T2:%.*]] = load ptr, ptr [[T]], align 8
 ; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[T2]], i64 -8
 ; CHECK-NEXT:    store i8 [[ARG1:%.*]], ptr [[T4]], align 8
@@ -1517,7 +1517,7 @@ define ptr @const_gep_0xi8_global() {
 define ptr @const_gep_chain(ptr %p, i64 %a) {
 ; CHECK-LABEL: @const_gep_chain(
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[P4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 6
+; CHECK-NEXT:    [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 6
 ; CHECK-NEXT:    ret ptr [[P4]]
 ;
   %p1 = getelementptr inbounds i8, ptr %p, i64 %a

diff  --git a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll
index 76f8c926e9bec9..1296dc6a33b5d1 100644
--- a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll
@@ -7,7 +7,7 @@ declare i32 @test58_d(i64 )
 
 define i1 @test59(ptr %foo) {
 ; CHECK-LABEL: @test59(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i32 8
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO:%.*]], i32 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[GEP1]] to i40
 ; CHECK-NEXT:    [[USE:%.*]] = zext i40 [[TMP1]] to i64
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
@@ -23,7 +23,7 @@ define i1 @test59(ptr %foo) {
 
 define i1 @test59_as1(ptr addrspace(1) %foo) {
 ; CHECK-LABEL: @test59_as1(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[FOO:%.*]], i16 8
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[FOO:%.*]], i16 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP1]] to i16
 ; CHECK-NEXT:    [[USE:%.*]] = zext i16 [[TMP1]] to i64
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
@@ -146,8 +146,8 @@ define i1 @test61_as1(ptr addrspace(1) %foo, i16 %i, i16 %j) {
 ; Negative test: GEP inbounds may cross sign boundary.
 define i1 @test62(ptr %a) {
 ; CHECK-LABEL: @test62(
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 10
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt ptr [[ARRAYIDX1]], [[ARRAYIDX2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -159,8 +159,8 @@ define i1 @test62(ptr %a) {
 
 define i1 @test62_as1(ptr addrspace(1) %a) {
 ; CHECK-LABEL: @test62_as1(
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[A:%.*]], i16 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[A]], i16 10
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[A:%.*]], i16 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[A]], i16 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt ptr addrspace(1) [[ARRAYIDX1]], [[ARRAYIDX2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index 1545d034b2ac31..5f2a35645e051b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -254,7 +254,7 @@ declare i32 @test58_d(i64)
 
 define i1 @test59(ptr %foo) {
 ; CHECK-LABEL: @test59(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 8
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO:%.*]], i64 8
 ; CHECK-NEXT:    [[USE:%.*]] = ptrtoint ptr [[GEP1]] to i64
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
 ; CHECK-NEXT:    ret i1 true
@@ -269,7 +269,7 @@ define i1 @test59(ptr %foo) {
 
 define i1 @test59_as1(ptr addrspace(1) %foo) {
 ; CHECK-LABEL: @test59_as1(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[FOO:%.*]], i16 8
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[FOO:%.*]], i16 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP1]] to i16
 ; CHECK-NEXT:    [[USE:%.*]] = zext i16 [[TMP1]] to i64
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]])
@@ -453,7 +453,7 @@ define i1 @test60_extra_use(ptr %foo, i64 %i, i64 %j) {
 
 define i1 @test60_extra_use_const_operands_inbounds(ptr %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test60_extra_use_const_operands_inbounds(
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[FOO:%.*]], i64 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO:%.*]], i64 4
 ; CHECK-NEXT:    call void @use(ptr nonnull [[GEP1]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[J:%.*]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]

diff  --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index b266d3e77c4344..6e1486660b24d7 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1238,8 +1238,8 @@ declare i32 @test58_d(i64)
 ; Negative test: GEP inbounds may cross sign boundary.
 define i1 @test62(ptr %a) {
 ; CHECK-LABEL: @test62(
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt ptr [[ARRAYIDX1]], [[ARRAYIDX2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -1251,8 +1251,8 @@ define i1 @test62(ptr %a) {
 
 define i1 @test62_as1(ptr addrspace(1) %a) {
 ; CHECK-LABEL: @test62_as1(
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[A:%.*]], i16 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[A]], i16 10
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[A:%.*]], i16 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[A]], i16 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt ptr addrspace(1) [[ARRAYIDX1]], [[ARRAYIDX2]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/inbounds-gep.ll b/llvm/test/Transforms/InstCombine/inbounds-gep.ll
index f2a647c135103d..467e5a82f89ccd 100644
--- a/llvm/test/Transforms/InstCombine/inbounds-gep.ll
+++ b/llvm/test/Transforms/InstCombine/inbounds-gep.ll
@@ -19,7 +19,7 @@ define void @call1() {
 define void @call2() {
 ; CHECK-LABEL: define void @call2() {
 ; CHECK-NEXT:    [[A:%.*]] = call dereferenceable(8) ptr @g()
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
 ; CHECK-NEXT:    call void @use(ptr nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ;
@@ -45,7 +45,7 @@ define void @call3() {
 define void @alloca() {
 ; CHECK-LABEL: define void @alloca() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i64, align 8
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
 ; CHECK-NEXT:    call void @use(ptr nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ;
@@ -70,7 +70,7 @@ define void @arg1(ptr %a) {
 define void @arg2(ptr dereferenceable(8) %a) {
 ; CHECK-LABEL: define void @arg2
 ; CHECK-SAME: (ptr dereferenceable(8) [[A:%.*]]) {
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
 ; CHECK-NEXT:    call void @use(ptr nonnull [[B]])
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll
index 1110bbc5403e49..bf1deadcf620f6 100644
--- a/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll
+++ b/llvm/test/Transforms/InstCombine/indexed-gep-compares.ll
@@ -1,5 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=instcombine -S  < %s | FileCheck %s
+; RUN: opt -passes='instcombine<no-verify-fixpoint>' -S  < %s | FileCheck %s
+
+; In many of these tests nuw can be inferred on the sunk GEP in the exit
+; block. However, the condition is only converted into the necessary form
+; after it has been sunk and revisited already.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
 
@@ -131,7 +135,7 @@ define ptr @test3_no_inbounds1(ptr %A, i32 %Offset) {
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb:
 ; CHECK-NEXT:    [[RHS:%.*]] = phi ptr [ [[RHS_NEXT:%.*]], [[BB]] ], [ [[TMP]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LHS:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 400
+; CHECK-NEXT:    [[LHS:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 400
 ; CHECK-NEXT:    [[RHS_NEXT]] = getelementptr inbounds i8, ptr [[RHS]], i32 4
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ult ptr [[LHS]], [[RHS]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[BB2:%.*]], label [[BB]]
@@ -160,7 +164,7 @@ define ptr @test3_no_inbounds2(ptr %A, i32 %Offset) {
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb:
 ; CHECK-NEXT:    [[RHS:%.*]] = phi ptr [ [[RHS_NEXT:%.*]], [[BB]] ], [ [[TMP]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[LHS:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 400
+; CHECK-NEXT:    [[LHS:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 400
 ; CHECK-NEXT:    [[RHS_NEXT]] = getelementptr i8, ptr [[RHS]], i32 4
 ; CHECK-NEXT:    [[COND:%.*]] = icmp ult ptr [[LHS]], [[RHS]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[BB2:%.*]], label [[BB]]
@@ -376,7 +380,7 @@ define i1 @test8(ptr %in, i64 %offset) {
 ; CHECK-NEXT:    [[GEPI8:%.*]] = getelementptr inbounds i8, ptr [[CASTI8]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[LD]] to i32
 ; CHECK-NEXT:    [[PTRCAST:%.*]] = inttoptr i32 [[TMP2]] to ptr
-; CHECK-NEXT:    [[GEPI32:%.*]] = getelementptr inbounds i8, ptr [[PTRCAST]], i32 4
+; CHECK-NEXT:    [[GEPI32:%.*]] = getelementptr inbounds nuw i8, ptr [[PTRCAST]], i32 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[GEPI32]], [[GEPI8]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -393,7 +397,7 @@ entry:
 define void @test_zero_offset_cycle(ptr %arg) {
 ; CHECK-LABEL: @test_zero_offset_cycle(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i32 8
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG:%.*]], i32 8
 ; CHECK-NEXT:    [[GEP_INT:%.*]] = ptrtoint ptr [[GEP]] to i32
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:

diff  --git a/llvm/test/Transforms/InstCombine/intptr1.ll b/llvm/test/Transforms/InstCombine/intptr1.ll
index 2bf255341f4bbf..6758180a4b81af 100644
--- a/llvm/test/Transforms/InstCombine/intptr1.ll
+++ b/llvm/test/Transforms/InstCombine/intptr1.ll
@@ -68,7 +68,7 @@ define void @test1_neg(ptr %a, ptr readnone %a_end, ptr %b.i64) {
 ; CHECK-NEXT:    [[I1:%.*]] = load float, ptr [[A]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[I1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], ptr [[A_ADDR_03]], align 4
-; CHECK-NEXT:    [[ADD]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[ADD]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[A_ADDR_03]], i64 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult ptr [[INCDEC_PTR]], [[A_END]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]

diff  --git a/llvm/test/Transforms/InstCombine/intptr7.ll b/llvm/test/Transforms/InstCombine/intptr7.ll
index 18fdd208e99a02..3120f028bd59be 100644
--- a/llvm/test/Transforms/InstCombine/intptr7.ll
+++ b/llvm/test/Transforms/InstCombine/intptr7.ll
@@ -6,7 +6,7 @@ define void @matching_phi(i64 %a, ptr %b, i1 %cond) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 8
+; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 8
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1
@@ -48,7 +48,7 @@ define void @no_matching_phi(i64 %a, ptr %b, i1 %cond) {
 ; CHECK-LABEL: @no_matching_phi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD_INT:%.*]] = add i64 [[A:%.*]], 1
-; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 8
+; CHECK-NEXT:    [[ADDB:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 8
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[B:%.*]], label [[A:%.*]]
 ; CHECK:       A:
 ; CHECK-NEXT:    [[TMP0:%.*]] = inttoptr i64 [[ADD_INT]] to ptr

diff  --git a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
index 83a34de768f2e1..61afa3ba9d7171 100644
--- a/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
+++ b/llvm/test/Transforms/InstCombine/load-bitcast-select.ll
@@ -16,8 +16,8 @@ define void @_Z3foov() {
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[I_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x float], ptr @a, i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [1000 x float], ptr @b, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x float], ptr @a, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x float], ptr @b, i64 0, i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp fast olt float [[TMP1]], [[TMP2]]

diff  --git a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
index 7826611ecc1657..644795f08ac222 100644
--- a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
+++ b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
@@ -22,7 +22,7 @@ define void @_Z4testPcl(ptr %out, i64 %size) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[I_0]], [[SIZE:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[OUT:%.*]], i64 [[I_0]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT:%.*]], i64 [[I_0]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[I_0]]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[SIZE]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]

diff  --git a/llvm/test/Transforms/InstCombine/memccpy.ll b/llvm/test/Transforms/InstCombine/memccpy.ll
index 91a1e5cbc1fe5e..f9cc3eaac54e20 100644
--- a/llvm/test/Transforms/InstCombine/memccpy.ll
+++ b/llvm/test/Transforms/InstCombine/memccpy.ll
@@ -11,7 +11,7 @@ declare ptr @memccpy(ptr, ptr, i32, i64)
 define ptr @memccpy_to_memcpy(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy(
 ; CHECK-NEXT:    store i64 8245940763182785896, ptr [[DST:%.*]], align 1
-; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
+; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 8
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 12) ; 114 is 'r'
@@ -21,7 +21,7 @@ define ptr @memccpy_to_memcpy(ptr %dst) {
 define ptr @memccpy_to_memcpy2(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy2(
 ; CHECK-NEXT:    store i64 8245940763182785896, ptr [[DST:%.*]], align 1
-; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
+; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 8
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 8); ; 114 is 'r'
@@ -122,7 +122,7 @@ define ptr @memccpy_to_memcpy8(ptr %dst) {
 define ptr @memccpy_to_memcpy9(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy9(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(12) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(12) @StopCharAfterNulTerminator, i64 12, i1 false)
-; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 12
+; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 12
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = call ptr @memccpy(ptr %dst, ptr @StopCharAfterNulTerminator, i32 120, i64 15) ; 120 is 'x'
@@ -132,7 +132,7 @@ define ptr @memccpy_to_memcpy9(ptr %dst, i64 %n) {
 define ptr @memccpy_to_memcpy10(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy10(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @StringWithEOF, i64 11, i1 false)
-; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 11
+; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 11
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = call ptr @memccpy(ptr %dst, ptr @StringWithEOF, i32 255, i64 15)
@@ -142,7 +142,7 @@ define ptr @memccpy_to_memcpy10(ptr %dst, i64 %n) {
 define ptr @memccpy_to_memcpy11(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy11(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @StringWithEOF, i64 11, i1 false)
-; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 11
+; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 11
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = call ptr @memccpy(ptr %dst, ptr @StringWithEOF, i32 -1, i64 15)
@@ -152,7 +152,7 @@ define ptr @memccpy_to_memcpy11(ptr %dst, i64 %n) {
 define ptr @memccpy_to_memcpy12(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy12(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @StringWithEOF, i64 11, i1 false)
-; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 11
+; CHECK-NEXT:    [[CALL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 11
 ; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %call = call ptr @memccpy(ptr %dst, ptr @StringWithEOF, i32 1023, i64 15)

diff  --git a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll
index e58e357684e18c..b86066c2776e86 100644
--- a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll
@@ -59,7 +59,7 @@ define void @test5(ptr %dest) {
 define void @test6(ptr %dest) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[A:%.*]] = alloca [7 x i8], align 1
-; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 2
 ; CHECK-NEXT:    store i16 42, ptr [[P2]], align 2
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DEST:%.*]], ptr noundef nonnull align 1 dereferenceable(7) [[P2]], i64 7, i1 false)
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/InstCombine/mempcpy.ll b/llvm/test/Transforms/InstCombine/mempcpy.ll
index 82c34f8a864ce7..e277c1e14e24b4 100644
--- a/llvm/test/Transforms/InstCombine/mempcpy.ll
+++ b/llvm/test/Transforms/InstCombine/mempcpy.ll
@@ -34,7 +34,7 @@ define ptr @memcpy_small_const_n(ptr %d, ptr nocapture readonly %s) {
 ; CHECK-LABEL: @memcpy_small_const_n(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[S:%.*]], align 1
 ; CHECK-NEXT:    store i64 [[TMP1]], ptr [[D:%.*]], align 1
-; CHECK-NEXT:    [[R:%.*]] = getelementptr inbounds i8, ptr [[D]], i64 8
+; CHECK-NEXT:    [[R:%.*]] = getelementptr inbounds nuw i8, ptr [[D]], i64 8
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %r = tail call ptr @mempcpy(ptr %d, ptr %s, i64 8)
@@ -44,7 +44,7 @@ define ptr @memcpy_small_const_n(ptr %d, ptr nocapture readonly %s) {
 define ptr @memcpy_big_const_n(ptr %d, ptr nocapture readonly %s) {
 ; CHECK-LABEL: @memcpy_big_const_n(
 ; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(1024) [[D:%.*]], ptr noundef nonnull align 1 dereferenceable(1024) [[S:%.*]], i64 1024, i1 false)
-; CHECK-NEXT:    [[R:%.*]] = getelementptr inbounds i8, ptr [[D]], i64 1024
+; CHECK-NEXT:    [[R:%.*]] = getelementptr inbounds nuw i8, ptr [[D]], i64 1024
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
   %r = tail call ptr @mempcpy(ptr %d, ptr %s, i64 1024)

diff  --git a/llvm/test/Transforms/InstCombine/memset2.ll b/llvm/test/Transforms/InstCombine/memset2.ll
index be5faccca3c43b..04a19d6c2f3d10 100644
--- a/llvm/test/Transforms/InstCombine/memset2.ll
+++ b/llvm/test/Transforms/InstCombine/memset2.ll
@@ -9,7 +9,7 @@ define i32 @test(ptr addrspace(1) nocapture %moves) {
 ; CHECK-LABEL: define i32 @test
 ; CHECK-SAME: (ptr addrspace(1) nocapture [[MOVES:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[MOVES]], i64 26
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[MOVES]], i64 26
 ; CHECK-NEXT:    store i64 0, ptr addrspace(1) [[GEP]], align 1
 ; CHECK-NEXT:    ret i32 0
 ;

diff  --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4e0355e61739a8..bac51c82f36dda 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -1,5 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s
+
+; In the indexed_compare test the comparison is only converted int a form
+; from which we can infer non-negativity after the GEP has already been sunk
+; and revisited.
 
 define ptr @bitcast_opaque_to_opaque(ptr %a) {
 ; CHECK-LABEL: @bitcast_opaque_to_opaque(
@@ -241,7 +245,7 @@ define ptr @geps_combinable_
diff erent_elem_type7(ptr %a, i64 %idx) {
 define ptr @geps_combinable_
diff erent_elem_type8(ptr %a, i64 %idx) {
 ; CHECK-LABEL: @geps_combinable_
diff erent_elem_type8(
 ; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds { { i32, i32 } }, ptr [[A:%.*]], i64 [[IDX:%.*]], i32 0, i32 1
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw i8, ptr [[A2]], i64 4
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
   %a2 = getelementptr inbounds { { i32, i32 } }, ptr %a, i64 %idx, i32 0, i32 1
@@ -292,7 +296,7 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 3
 ; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw i8, ptr [[A2]], i64 4
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
   %a2 = getelementptr inbounds <vscale x 2 x i32>, ptr %a, i64 1
@@ -305,7 +309,7 @@ define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 5
 ; CHECK-NEXT:    [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
+; CHECK-NEXT:    [[A3:%.*]] = getelementptr inbounds nuw i8, ptr [[A2]], i64 4
 ; CHECK-NEXT:    ret ptr [[A3]]
 ;
   %a2 = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr %a, i64 1
@@ -444,7 +448,7 @@ bb2:
 define ptr addrspace(1) @gep_of_addrspace_cast(ptr %ptr) {
 ; CHECK-LABEL: @gep_of_addrspace_cast(
 ; CHECK-NEXT:    [[CAST1:%.*]] = addrspacecast ptr [[PTR:%.*]] to ptr addrspace(1)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[CAST1]], i64 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[CAST1]], i64 4
 ; CHECK-NEXT:    ret ptr addrspace(1) [[GEP]]
 ;
   %cast1 = addrspacecast ptr %ptr to ptr addrspace(1)
@@ -549,7 +553,7 @@ define ptr @phi_of_gep_flags_1(i1 %c, ptr %p) {
 ; CHECK:       else:
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
-; CHECK-NEXT:    [[PHI:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[PHI:%.*]] = getelementptr nusw nuw i8, ptr [[P:%.*]], i64 4
 ; CHECK-NEXT:    ret ptr [[PHI]]
 ;
   br i1 %c, label %if, label %else
@@ -694,7 +698,7 @@ define ptr @select_of_gep(i1 %c, ptr %p) {
 define ptr @select_of_gep_flags_1(i1 %c, ptr %p) {
 ; CHECK-LABEL: @select_of_gep_flags_1(
 ; CHECK-NEXT:    [[S_V:%.*]] = select i1 [[C:%.*]], i64 4, i64 8
-; CHECK-NEXT:    [[S:%.*]] = getelementptr nusw i8, ptr [[P:%.*]], i64 [[S_V]]
+; CHECK-NEXT:    [[S:%.*]] = getelementptr nusw nuw i8, ptr [[P:%.*]], i64 [[S_V]]
 ; CHECK-NEXT:    ret ptr [[S]]
 ;
   %gep1 = getelementptr inbounds i32, ptr %p, i64 1

diff  --git a/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll b/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
index 998169ec2fe26f..66c14a66206d63 100644
--- a/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
+++ b/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll
@@ -19,7 +19,7 @@ define i32 @test_gep_and_bitcast(i1 %cond, i1 %cond2) {
 ; ALL:       bb2:
 ; ALL-NEXT:    br label [[EXIT]]
 ; ALL:       exit:
-; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES_PHI:%.*]] = load i32, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    store i32 1, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1
@@ -57,7 +57,7 @@ define i32 @test_gep_and_bitcast_arg(ptr %obj, i1 %cond, i1 %cond2) {
 ; ALL:       bb2:
 ; ALL-NEXT:    br label [[EXIT]]
 ; ALL:       exit:
-; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds i8, ptr [[OBJ:%.*]], i64 16
+; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ:%.*]], i64 16
 ; ALL-NEXT:    [[RES_PHI:%.*]] = load i32, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    store i32 1, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1
@@ -157,7 +157,7 @@ define i32 @test_gep_i32ptr(i1 %cond, i1 %cond2) {
 ; ALL:       bb2:
 ; ALL-NEXT:    br label [[EXIT]]
 ; ALL:       exit:
-; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 64
+; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 64
 ; ALL-NEXT:    [[RES_PHI:%.*]] = load i32, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    store i32 1, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1
@@ -196,7 +196,7 @@ define i32 @test_gep_and_bitcast_gep_base_ptr(i1 %cond, i1 %cond2) {
 ; ALL:       bb2:
 ; ALL-NEXT:    br label [[EXIT]]
 ; ALL:       exit:
-; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds i8, ptr [[OBJ0]], i64 32
+; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ0]], i64 32
 ; ALL-NEXT:    [[RES_PHI:%.*]] = load i32, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    store i32 1, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1
@@ -234,7 +234,7 @@ define i32 @test_gep_and_bitcast_same_bb(i1 %cond, i1 %cond2) {
 ; ALL:       bb2:
 ; ALL-NEXT:    br label [[EXIT]]
 ; ALL:       exit:
-; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES_PHI:%.*]] = load i32, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    store i32 1, ptr [[PTR_TYPED]], align 4
 ; ALL-NEXT:    [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1
@@ -264,11 +264,11 @@ define i32 @test_gep_and_bitcast_same_bb_and_extra_use(i1 %cond, i1 %cond2) {
 ; INSTCOMBINE-LABEL: @test_gep_and_bitcast_same_bb_and_extra_use(
 ; INSTCOMBINE-NEXT:  entry:
 ; INSTCOMBINE-NEXT:    [[OBJ:%.*]] = call ptr @get_ptr.i8()
-; INSTCOMBINE-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; INSTCOMBINE-NEXT:    [[PTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; INSTCOMBINE-NEXT:    call void @foo.i32(ptr nonnull [[PTR1]])
 ; INSTCOMBINE-NEXT:    br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]]
 ; INSTCOMBINE:       bb2:
-; INSTCOMBINE-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; INSTCOMBINE-NEXT:    [[PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; INSTCOMBINE-NEXT:    br label [[EXIT]]
 ; INSTCOMBINE:       exit:
 ; INSTCOMBINE-NEXT:    [[PTR_TYPED:%.*]] = phi ptr [ [[PTR1]], [[ENTRY:%.*]] ], [ [[PTR2]], [[BB2]] ]
@@ -280,7 +280,7 @@ define i32 @test_gep_and_bitcast_same_bb_and_extra_use(i1 %cond, i1 %cond2) {
 ; INSTCOMBINEGVN-LABEL: @test_gep_and_bitcast_same_bb_and_extra_use(
 ; INSTCOMBINEGVN-NEXT:  entry:
 ; INSTCOMBINEGVN-NEXT:    [[OBJ:%.*]] = call ptr @get_ptr.i8()
-; INSTCOMBINEGVN-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; INSTCOMBINEGVN-NEXT:    [[PTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; INSTCOMBINEGVN-NEXT:    call void @foo.i32(ptr nonnull [[PTR1]])
 ; INSTCOMBINEGVN-NEXT:    br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]]
 ; INSTCOMBINEGVN:       bb2:
@@ -322,7 +322,7 @@ define i8 @test_gep(i1 %cond, i1 %cond2) {
 ; ALL:       bb2:
 ; ALL-NEXT:    br label [[EXIT]]
 ; ALL:       exit:
-; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR_TYPED:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES_PHI:%.*]] = load i8, ptr [[PTR_TYPED]], align 1
 ; ALL-NEXT:    store i8 1, ptr [[PTR_TYPED]], align 1
 ; ALL-NEXT:    [[RES:%.*]] = select i1 [[COND2:%.*]], i8 [[RES_PHI]], i8 1
@@ -357,12 +357,12 @@ define i32 @test_extra_uses(i1 %cond, i1 %cond2) {
 ; ALL-NEXT:    [[OBJ:%.*]] = call ptr @get_ptr.i8()
 ; ALL-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; ALL:       bb1:
-; ALL-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES1:%.*]] = load i32, ptr [[PTR1]], align 4
 ; ALL-NEXT:    call void @foo.i32(ptr nonnull [[PTR1]])
 ; ALL-NEXT:    br label [[EXIT:%.*]]
 ; ALL:       bb2:
-; ALL-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES2:%.*]] = load i32, ptr [[PTR2]], align 4
 ; ALL-NEXT:    call void @foo.i32(ptr nonnull [[PTR2]])
 ; ALL-NEXT:    br label [[EXIT]]
@@ -451,7 +451,7 @@ define i32 @test_extra_uses_multiple_geps(i1 %cond, i1 %cond2) {
 ; ALL-NEXT:    [[OBJ:%.*]] = call ptr @get_ptr.i8()
 ; ALL-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; ALL:       bb1:
-; ALL-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES1:%.*]] = load i32, ptr [[PTR1]], align 4
 ; ALL-NEXT:    call void @foo.i32(ptr nonnull [[PTR1]])
 ; ALL-NEXT:    br label [[EXIT:%.*]]
@@ -499,12 +499,12 @@ define i8 @test_gep_extra_uses(i1 %cond, i1 %cond2) {
 ; ALL-NEXT:    [[OBJ:%.*]] = call ptr @get_ptr.i8()
 ; ALL-NEXT:    br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
 ; ALL:       bb1:
-; ALL-NEXT:    [[PTR1:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES1:%.*]] = load i8, ptr [[PTR1]], align 1
 ; ALL-NEXT:    call void @foo.i8(ptr nonnull [[PTR1]])
 ; ALL-NEXT:    br label [[EXIT:%.*]]
 ; ALL:       bb2:
-; ALL-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 16
+; ALL-NEXT:    [[PTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[OBJ]], i64 16
 ; ALL-NEXT:    [[RES2:%.*]] = load i8, ptr [[PTR2]], align 1
 ; ALL-NEXT:    call void @foo.i8(ptr nonnull [[PTR2]])
 ; ALL-NEXT:    br label [[EXIT]]

diff  --git a/llvm/test/Transforms/InstCombine/phi-timeout.ll b/llvm/test/Transforms/InstCombine/phi-timeout.ll
index f30750ef0455c3..bb3717de0ca27f 100644
--- a/llvm/test/Transforms/InstCombine/phi-timeout.ll
+++ b/llvm/test/Transforms/InstCombine/phi-timeout.ll
@@ -10,7 +10,7 @@ define void @timeout(ptr nocapture readonly %cinfo, ptr %ptr) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[CINFO:%.*]], i32 4
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[CINFO:%.*]], i32 4
 ; CHECK-NEXT:    [[L:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2
 ; CHECK-NEXT:    [[CMP17:%.*]] = icmp eq i16 [[L]], 0
 ; CHECK-NEXT:    [[EXTRACT_T1:%.*]] = trunc i16 [[L]] to i8

diff  --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index c3548590d7f57a..687cacfb442d22 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -2097,7 +2097,7 @@ define i1 @cmp_eq_phi_node_can_fold_1(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2129,12 +2129,12 @@ define i1 @cmp_eq_phi_node_can_fold_2(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 49
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[SUB_IS_ZERO1:%.*]], label [[JOIN]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i8 [[TMP7]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2174,7 +2174,7 @@ define i1 @cmp_eq_phi_node_can_fold_3(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2207,12 +2207,12 @@ define i1 @cmp_eq_phi_node_can_fold_4(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 49
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[JOIN]], label [[SUB_IS_ZERO1:%.*]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i8 [[TMP7]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2252,7 +2252,7 @@ define i1 @cmp_ne_phi_node_can_fold_1(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2284,12 +2284,12 @@ define i1 @cmp_ne_phi_node_can_fold_2(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 49
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[SUB_IS_ZERO1:%.*]], label [[JOIN]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2329,7 +2329,7 @@ define i1 @cmp_ne_phi_node_can_fold_3(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2361,12 +2361,12 @@ define i1 @cmp_ne_phi_node_can_fold_4(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 49
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[JOIN]], label [[SUB_IS_ZERO1:%.*]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2406,7 +2406,7 @@ define i1 @cmp_sgt_phi_node_can_fold_1(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i8 [[TMP4]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2438,12 +2438,12 @@ define i1 @cmp_sgt_phi_node_can_fold_2(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 49
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[JOIN]], label [[SUB_IS_ZERO1:%.*]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2485,7 +2485,7 @@ define i1 @cmp_sgt_phi_node_cant_fold_1(ptr %C) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2520,14 +2520,14 @@ define i1 @cmp_sgt_phi_node_cant_fold_2(ptr %C) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nsw i32 [[TMP7]], -49
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[SUB_IS_ZERO1:%.*]], label [[JOIN]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2596,7 +2596,7 @@ define i1 @cmp_slt_phi_node_can_fold_2(ptr %C) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 48
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[JOIN:%.*]], label [[SUB_IS_ZERO:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 49
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[JOIN]], label [[SUB_IS_ZERO1:%.*]]
@@ -2639,7 +2639,7 @@ define i1 @cmp_slt_phi_node_cant_fold_1(ptr %C) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
 ; CHECK-NEXT:    br label [[JOIN]]
@@ -2674,14 +2674,14 @@ define i1 @cmp_slt_phi_node_cant_fold_2(ptr %C) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[SUB_IS_ZERO:%.*]], label [[JOIN:%.*]]
 ; CHECK:       sub_is_zero:
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nsw i32 [[TMP7]], -49
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[SUB_IS_ZERO1:%.*]], label [[JOIN]]
 ; CHECK:       sub_is_zero1:
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
 ; CHECK-NEXT:    br label [[JOIN]]

diff  --git a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
index a1d10c274c4655..a29ed0e759c82b 100644
--- a/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/ptr-replace-alloca.ll
@@ -45,10 +45,10 @@ define i8 @volatile_load_keep_alloca(i1 %cond) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 4 dereferenceable(32) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false)
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[VAL_IF:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ALLOCA]], i64 1
+; CHECK-NEXT:    [[VAL_IF:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[ALLOCA]], i64 1
 ; CHECK-NEXT:    br label [[SINK:%.*]]
 ; CHECK:       else:
-; CHECK-NEXT:    [[VAL_ELSE:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ALLOCA]], i64 2
+; CHECK-NEXT:    [[VAL_ELSE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[ALLOCA]], i64 2
 ; CHECK-NEXT:    br label [[SINK]]
 ; CHECK:       sink:
 ; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ]
@@ -81,10 +81,10 @@ define i8 @no_memcpy_keep_alloca(i1 %cond) {
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1)
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[VAL_IF:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ALLOCA]], i64 1
+; CHECK-NEXT:    [[VAL_IF:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[ALLOCA]], i64 1
 ; CHECK-NEXT:    br label [[SINK:%.*]]
 ; CHECK:       else:
-; CHECK-NEXT:    [[VAL_ELSE:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ALLOCA]], i64 2
+; CHECK-NEXT:    [[VAL_ELSE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[ALLOCA]], i64 2
 ; CHECK-NEXT:    br label [[SINK]]
 ; CHECK:       sink:
 ; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ]
@@ -259,7 +259,7 @@ define i32 @addrspace_
diff _keep_alloca_extra_gep(i1 %cond, ptr %x) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[A]], ptr addrspace(1) noundef align 16 dereferenceable(32) @g2, i64 32, i1 false)
 ; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
 ; CHECK:       if:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 4
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[GEP]], [[IF]] ], [ [[X:%.*]], [[ENTRY:%.*]] ]
@@ -430,7 +430,7 @@ entry:
 define i8 @select_
diff _addrspace_remove_alloca_asan(i1 %cond, ptr %p) sanitize_address {
 ; CHECK-LABEL: @select_
diff _addrspace_remove_alloca_asan(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP2:%.*]] = select i1 [[COND:%.*]], ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @g2, i64 4), ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @g2, i64 6)
+; CHECK-NEXT:    [[GEP2:%.*]] = select i1 [[COND:%.*]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @g2, i64 4), ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @g2, i64 6)
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr addrspace(1) [[GEP2]], align 1
 ; CHECK-NEXT:    ret i8 [[LOAD]]
 ;
@@ -466,8 +466,8 @@ define i8 @call_readonly_keep_alloca2() {
 ; CHECK-LABEL: @call_readonly_keep_alloca2(
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [32 x i8], align 1, addrspace(1)
 ; CHECK-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 1 dereferenceable(16) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(16) @g1, i64 16, i1 false)
-; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ALLOCA]], i64 16
-; CHECK-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 1 dereferenceable(16) [[A1]], ptr addrspace(1) noundef align 16 dereferenceable(16) @g2, i64 16, i1 false)
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[ALLOCA]], i64 16
+; CHECK-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 dereferenceable(16) [[A1]], ptr addrspace(1) noundef align 16 dereferenceable(16) @g2, i64 16, i1 false)
 ; CHECK-NEXT:    [[P:%.*]] = addrspacecast ptr addrspace(1) [[ALLOCA]] to ptr
 ; CHECK-NEXT:    [[V:%.*]] = call i8 @readonly_callee(ptr [[P]])
 ; CHECK-NEXT:    ret i8 [[V]]

diff  --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 54d1417ff9fd98..736fe66f0419fa 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -411,7 +411,7 @@ define ptr @ptrmask_to_modified_gep_zero_argument() {
 define ptr @ptrmask_to_preserves_inbounds(ptr align 16 %p) {
 ; CHECK-LABEL: define ptr @ptrmask_to_preserves_inbounds
 ; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 28
 ; CHECK-NEXT:    ret ptr [[GEP1]]
 ;
   %gep = getelementptr inbounds i8, ptr %p, i32 31

diff  --git a/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll b/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
index 56580a8b68c681..3c10378d17220a 100644
--- a/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
+++ b/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
@@ -67,7 +67,7 @@ define double @test_nnan_flag_enabled(ptr %arr_d) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul nnan double [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -101,7 +101,7 @@ define double @test_ninf_flag_enabled(ptr %arr_d) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul ninf double [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -135,7 +135,7 @@ define double @test_nsz_flag_enabled(ptr %arr_d) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul nsz double [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -169,7 +169,7 @@ define double @test_phi_initalise_to_non_zero(ptr %arr_d) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -205,7 +205,7 @@ define double @test_multiple_phi_operands(ptr %arr_d, i1 %entry_cond) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY_2]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[ENTRY_2]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -244,7 +244,7 @@ define double @test_multiple_phi_operands_with_non_zero(ptr %arr_d, i1 %entry_co
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY_2]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[ENTRY_2]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -281,7 +281,7 @@ define i32 @test_int_phi_operands(ptr %arr_d) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL]] = mul nsw i32 [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -315,7 +315,7 @@ define i32 @test_int_phi_operands_initalise_to_non_zero(ptr %arr_d) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL]] = mul i32 [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -351,7 +351,7 @@ define i32 @test_multiple_int_phi_operands(ptr %arr_d, i1 %entry_cond) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY_2]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 0, [[ENTRY_2]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL]] = mul i32 [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
@@ -390,7 +390,7 @@ define i32 @test_multiple_int_phi_operands_initalise_to_non_zero(ptr %arr_d, i1
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY_2]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[F_PROD_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 1, [[ENTRY_2]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL]] = mul i32 [[F_PROD_01]], [[TMP0]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1

diff  --git a/llvm/test/Transforms/InstCombine/select-cmp-br.ll b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
index f1c62a5593f0f2..2a8c19ece4b70a 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp-br.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
@@ -12,7 +12,7 @@ define void @test1(ptr %arg) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 16
 ; CHECK-NEXT:    [[N:%.*]] = load ptr, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP5_NOT:%.*]] = icmp eq ptr [[M]], [[N]]
 ; CHECK-NEXT:    br i1 [[TMP5_NOT]], label [[BB8:%.*]], label [[BB10:%.*]]
@@ -22,7 +22,7 @@ define void @test1(ptr %arg) {
 ; CHECK-NEXT:    tail call void @bar(ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[M]], i64 72
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 72
 ; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB]]
@@ -54,7 +54,7 @@ define void @test2(ptr %arg) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 16
 ; CHECK-NEXT:    [[N:%.*]] = load ptr, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[M]], [[N]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[BB10:%.*]], label [[BB8:%.*]]
@@ -64,7 +64,7 @@ define void @test2(ptr %arg) {
 ; CHECK-NEXT:    tail call void @bar(ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[M]], i64 72
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 72
 ; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB]]
@@ -96,7 +96,7 @@ define void @test3(ptr %arg) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 16
 ; CHECK-NEXT:    [[N:%.*]] = load ptr, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[M]], [[N]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[BB8:%.*]], label [[BB10:%.*]]
@@ -106,7 +106,7 @@ define void @test3(ptr %arg) {
 ; CHECK-NEXT:    tail call void @bar(ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[M]], i64 72
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 72
 ; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB]]
@@ -138,7 +138,7 @@ define void @test4(ptr %arg) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG]], i64 16
 ; CHECK-NEXT:    [[N:%.*]] = load ptr, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq ptr [[M]], [[N]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[BB10:%.*]], label [[BB8:%.*]]
@@ -148,7 +148,7 @@ define void @test4(ptr %arg) {
 ; CHECK-NEXT:    tail call void @bar(ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[M]], i64 72
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 72
 ; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 [[TMP4]](ptr nonnull [[ARG]])
 ; CHECK-NEXT:    br label [[BB]]

diff  --git a/llvm/test/Transforms/InstCombine/select-gep.ll b/llvm/test/Transforms/InstCombine/select-gep.ll
index 17304801b1b345..d3c357ea9751b6 100644
--- a/llvm/test/Transforms/InstCombine/select-gep.ll
+++ b/llvm/test/Transforms/InstCombine/select-gep.ll
@@ -47,7 +47,7 @@ define ptr @test1d(ptr %p, ptr %q) {
 ; CHECK-LABEL: @test1d(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt ptr [[P:%.*]], [[Q:%.*]]
 ; CHECK-NEXT:    [[SELECT_V:%.*]] = select i1 [[CMP]], ptr [[P]], ptr [[Q]]
-; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr inbounds i8, ptr [[SELECT_V]], i64 16
+; CHECK-NEXT:    [[SELECT:%.*]] = getelementptr inbounds nuw i8, ptr [[SELECT_V]], i64 16
 ; CHECK-NEXT:    ret ptr [[SELECT]]
 ;
   %gep1 = getelementptr inbounds i32, ptr %p, i64 4
@@ -130,7 +130,7 @@ define ptr @test2c(ptr %p, i64 %x, i64 %y) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 0, i64 24
-; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[SEL_IDX]]
+; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP1]], i64 [[SEL_IDX]]
 ; CHECK-NEXT:    ret ptr [[SEL]]
 ;
   %gep1 = getelementptr inbounds i32, ptr %p, i64 %x
@@ -146,7 +146,7 @@ define ptr @test2d(ptr %p, i64 %x, i64 %y) {
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[X:%.*]]
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SEL_IDX:%.*]] = select i1 [[ICMP]], i64 24, i64 0
-; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[SEL_IDX]]
+; CHECK-NEXT:    [[SEL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP1]], i64 [[SEL_IDX]]
 ; CHECK-NEXT:    ret ptr [[SEL]]
 ;
   %gep1 = getelementptr inbounds i32, ptr %p, i64 %x

diff  --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll
index 1da6903ae46664..ccb96012ae3ccd 100644
--- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll
+++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll
@@ -187,7 +187,7 @@ define i32 @sink_gep1(i1 %c) {
 ; CHECK:       early_return:
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       use_block:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[VAR]], i64 4
 ; CHECK-NEXT:    [[VAR3:%.*]] = call i32 @unknown(ptr nonnull [[GEP]]) #[[ATTR1]]
 ; CHECK-NEXT:    ret i32 [[VAR3]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/snprintf-2.ll b/llvm/test/Transforms/InstCombine/snprintf-2.ll
index 0465457aacec75..5195b4271ab8c9 100644
--- a/llvm/test/Transforms/InstCombine/snprintf-2.ll
+++ b/llvm/test/Transforms/InstCombine/snprintf-2.ll
@@ -32,12 +32,12 @@ define void @fold_snprintf_fmt() {
 ; BE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 16), align 4
 ; BE-NEXT:    [[PD3:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 24), align 8
 ; BE-NEXT:    store i16 12594, ptr [[PD3]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[PD3]], i64 2
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[PD3]], i64 2
 ; BE-NEXT:    store i8 0, ptr [[ENDPTR]], align 1
 ; BE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 12), align 4
 ; BE-NEXT:    [[PD2:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 16), align 8
 ; BE-NEXT:    store i8 49, ptr [[PD2]], align 1
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[PD2]], i64 1
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2]], i64 1
 ; BE-NEXT:    store i8 0, ptr [[ENDPTR1]], align 1
 ; BE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 8), align 4
 ; BE-NEXT:    [[PD1:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 8), align 8
@@ -58,12 +58,12 @@ define void @fold_snprintf_fmt() {
 ; LE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 16), align 4
 ; LE-NEXT:    [[PD3:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 24), align 8
 ; LE-NEXT:    store i16 12849, ptr [[PD3]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[PD3]], i64 2
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[PD3]], i64 2
 ; LE-NEXT:    store i8 0, ptr [[ENDPTR]], align 1
 ; LE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 12), align 4
 ; LE-NEXT:    [[PD2:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 16), align 8
 ; LE-NEXT:    store i8 49, ptr [[PD2]], align 1
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[PD2]], i64 1
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2]], i64 1
 ; LE-NEXT:    store i8 0, ptr [[ENDPTR1]], align 1
 ; LE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 8), align 4
 ; LE-NEXT:    [[PD1:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 8), align 8

diff  --git a/llvm/test/Transforms/InstCombine/snprintf-3.ll b/llvm/test/Transforms/InstCombine/snprintf-3.ll
index 7c93580b4ea546..d1375954a15ada 100644
--- a/llvm/test/Transforms/InstCombine/snprintf-3.ll
+++ b/llvm/test/Transforms/InstCombine/snprintf-3.ll
@@ -33,12 +33,12 @@ define void @fold_snprintf_pcnt_s() {
 ; BE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 16), align 4
 ; BE-NEXT:    [[PD3:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 24), align 8
 ; BE-NEXT:    store i16 12594, ptr [[PD3]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[PD3]], i64 2
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[PD3]], i64 2
 ; BE-NEXT:    store i8 0, ptr [[ENDPTR]], align 1
 ; BE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 12), align 4
 ; BE-NEXT:    [[PD2:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 16), align 8
 ; BE-NEXT:    store i8 49, ptr [[PD2]], align 1
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[PD2]], i64 1
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2]], i64 1
 ; BE-NEXT:    store i8 0, ptr [[ENDPTR1]], align 1
 ; BE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 8), align 4
 ; BE-NEXT:    [[PD1:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 8), align 8
@@ -59,12 +59,12 @@ define void @fold_snprintf_pcnt_s() {
 ; LE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 16), align 4
 ; LE-NEXT:    [[PD3:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 24), align 8
 ; LE-NEXT:    store i16 12849, ptr [[PD3]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[PD3]], i64 2
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[PD3]], i64 2
 ; LE-NEXT:    store i8 0, ptr [[ENDPTR]], align 1
 ; LE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 12), align 4
 ; LE-NEXT:    [[PD2:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 16), align 8
 ; LE-NEXT:    store i8 49, ptr [[PD2]], align 1
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[PD2]], i64 1
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2]], i64 1
 ; LE-NEXT:    store i8 0, ptr [[ENDPTR1]], align 1
 ; LE-NEXT:    store i32 3, ptr getelementptr (i8, ptr @asiz, i64 8), align 4
 ; LE-NEXT:    [[PD1:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 8), align 8

diff  --git a/llvm/test/Transforms/InstCombine/snprintf-4.ll b/llvm/test/Transforms/InstCombine/snprintf-4.ll
index 7006838ae9b58a..88d9ea06765d01 100644
--- a/llvm/test/Transforms/InstCombine/snprintf-4.ll
+++ b/llvm/test/Transforms/InstCombine/snprintf-4.ll
@@ -21,17 +21,17 @@ define void @fold_snprintf_pcnt_c(i32 %c) {
 ; CHECK-LABEL: @fold_snprintf_pcnt_c(
 ; CHECK-NEXT:    [[PDIMAX:%.*]] = load ptr, ptr @adst, align 8
 ; CHECK-NEXT:    store i8 1, ptr [[PDIMAX]], align 1
-; CHECK-NEXT:    [[NUL:%.*]] = getelementptr inbounds i8, ptr [[PDIMAX]], i64 1
+; CHECK-NEXT:    [[NUL:%.*]] = getelementptr inbounds nuw i8, ptr [[PDIMAX]], i64 1
 ; CHECK-NEXT:    store i8 0, ptr [[NUL]], align 1
 ; CHECK-NEXT:    store i32 1, ptr @asiz, align 4
 ; CHECK-NEXT:    [[PD2:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 8), align 8
 ; CHECK-NEXT:    store i8 2, ptr [[PD2]], align 1
-; CHECK-NEXT:    [[NUL1:%.*]] = getelementptr inbounds i8, ptr [[PD2]], i64 1
+; CHECK-NEXT:    [[NUL1:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2]], i64 1
 ; CHECK-NEXT:    store i8 0, ptr [[NUL1]], align 1
 ; CHECK-NEXT:    store i32 1, ptr getelementptr (i8, ptr @asiz, i64 4), align 4
 ; CHECK-NEXT:    [[PD2_0:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 16), align 8
 ; CHECK-NEXT:    store i8 0, ptr [[PD2_0]], align 1
-; CHECK-NEXT:    [[NUL2:%.*]] = getelementptr inbounds i8, ptr [[PD2_0]], i64 1
+; CHECK-NEXT:    [[NUL2:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2_0]], i64 1
 ; CHECK-NEXT:    store i8 0, ptr [[NUL2]], align 1
 ; CHECK-NEXT:    store i32 1, ptr getelementptr (i8, ptr @asiz, i64 8), align 4
 ; CHECK-NEXT:    [[PD1:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 24), align 8
@@ -41,7 +41,7 @@ define void @fold_snprintf_pcnt_c(i32 %c) {
 ; CHECK-NEXT:    [[PD2_C:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 32), align 8
 ; CHECK-NEXT:    [[CHAR:%.*]] = trunc i32 [[C:%.*]] to i8
 ; CHECK-NEXT:    store i8 [[CHAR]], ptr [[PD2_C]], align 1
-; CHECK-NEXT:    [[NUL3:%.*]] = getelementptr inbounds i8, ptr [[PD2_C]], i64 1
+; CHECK-NEXT:    [[NUL3:%.*]] = getelementptr inbounds nuw i8, ptr [[PD2_C]], i64 1
 ; CHECK-NEXT:    store i8 0, ptr [[NUL3]], align 1
 ; CHECK-NEXT:    store i32 1, ptr getelementptr (i8, ptr @asiz, i64 16), align 4
 ; CHECK-NEXT:    [[PD1_C:%.*]] = load ptr, ptr getelementptr (i8, ptr @adst, i64 40), align 8

diff  --git a/llvm/test/Transforms/InstCombine/snprintf.ll b/llvm/test/Transforms/InstCombine/snprintf.ll
index 4e51040b398074..33285a97e0f33e 100644
--- a/llvm/test/Transforms/InstCombine/snprintf.ll
+++ b/llvm/test/Transforms/InstCombine/snprintf.ll
@@ -103,7 +103,7 @@ define i32 @test_char_small_size(ptr %buf) #0 {
 define i32 @test_char_ok_size(ptr %buf) #0 {
 ; CHECK-LABEL: @test_char_ok_size(
 ; CHECK-NEXT:    store i8 65, ptr [[BUF:%.*]], align 1
-; CHECK-NEXT:    [[NUL:%.*]] = getelementptr inbounds i8, ptr [[BUF]], i64 1
+; CHECK-NEXT:    [[NUL:%.*]] = getelementptr inbounds nuw i8, ptr [[BUF]], i64 1
 ; CHECK-NEXT:    store i8 0, ptr [[NUL]], align 1
 ; CHECK-NEXT:    ret i32 1
 ;

diff  --git a/llvm/test/Transforms/InstCombine/sprintf-1.ll b/llvm/test/Transforms/InstCombine/sprintf-1.ll
index b9354bb4b29908..0749015059415c 100644
--- a/llvm/test/Transforms/InstCombine/sprintf-1.ll
+++ b/llvm/test/Transforms/InstCombine/sprintf-1.ll
@@ -56,7 +56,7 @@ define void @test_simplify3(ptr %dst) {
 define void @test_simplify4(ptr %dst) {
 ; CHECK-LABEL: @test_simplify4(
 ; CHECK-NEXT:    store i8 104, ptr [[DST:%.*]], align 1
-; CHECK-NEXT:    [[NUL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 1
+; CHECK-NEXT:    [[NUL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i32 1
 ; CHECK-NEXT:    store i8 0, ptr [[NUL]], align 1
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
index ddfc1c086347f4..e9dbb4c6a32311 100644
--- a/llvm/test/Transforms/InstCombine/stpncpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
@@ -47,7 +47,7 @@ define void @fold_stpncpy_overlap(ptr %dst, i64 %n) {
 ; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, ptr [[DST]], align 1
 ; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
 ; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
-; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
 ; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_SEL]])
 ; ANY-NEXT:    ret void
 ;
@@ -141,32 +141,32 @@ define void @fold_stpncpy_s1(ptr %dst) {
 ; BE-LABEL: @fold_stpncpy_s1(
 ; BE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
 ; BE-NEXT:    store i8 52, ptr [[DST]], align 1
-; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
 ; BE-NEXT:    store i16 13312, ptr [[DST]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(3) @str.6, i64 3, i1 false)
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.7, i64 9, i1 false)
-; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; BE-NEXT:    ret void
 ;
 ; LE-LABEL: @fold_stpncpy_s1(
 ; LE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
 ; LE-NEXT:    store i8 52, ptr [[DST]], align 1
-; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
 ; LE-NEXT:    store i16 52, ptr [[DST]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(3) @str.6, i64 3, i1 false)
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.7, i64 9, i1 false)
-; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; LE-NEXT:    ret void
 ;
@@ -204,38 +204,38 @@ define void @fold_stpncpy_s4(ptr %dst, i64 %n) {
 ; BE-LABEL: @fold_stpncpy_s4(
 ; BE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
 ; BE-NEXT:    store i8 49, ptr [[DST]], align 1
-; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
 ; BE-NEXT:    store i16 12594, ptr [[DST]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @s4, i64 3, i1 false)
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
 ; BE-NEXT:    store i32 825373492, ptr [[DST]], align 1
-; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.8, i64 9, i1 false)
-; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
 ; BE-NEXT:    ret void
 ;
 ; LE-LABEL: @fold_stpncpy_s4(
 ; LE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
 ; LE-NEXT:    store i8 49, ptr [[DST]], align 1
-; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
 ; LE-NEXT:    store i16 12849, ptr [[DST]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @s4, i64 3, i1 false)
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
 ; LE-NEXT:    store i32 875770417, ptr [[DST]], align 1
-; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.8, i64 9, i1 false)
-; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
 ; LE-NEXT:    ret void
 ;
@@ -312,44 +312,44 @@ define void @fold_stpncpy_a4(ptr %dst, i64 %n) {
 ; BE-LABEL: @fold_stpncpy_a4(
 ; BE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
 ; BE-NEXT:    store i8 49, ptr [[DST]], align 1
-; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
 ; BE-NEXT:    store i16 12594, ptr [[DST]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 3, i1 false)
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
 ; BE-NEXT:    store i32 825373492, ptr [[DST]], align 1
-; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 5, i1 false)
-; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.9, i64 9, i1 false)
-; BE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR4]])
 ; BE-NEXT:    ret void
 ;
 ; LE-LABEL: @fold_stpncpy_a4(
 ; LE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
 ; LE-NEXT:    store i8 49, ptr [[DST]], align 1
-; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
 ; LE-NEXT:    store i16 12849, ptr [[DST]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 3, i1 false)
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
 ; LE-NEXT:    store i32 875770417, ptr [[DST]], align 1
-; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 5, i1 false)
-; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.9, i64 9, i1 false)
-; LE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR4]])
 ; LE-NEXT:    ret void
 ;
@@ -398,7 +398,7 @@ define void @fold_stpncpy_s(ptr %dst, ptr %src) {
 ; ANY-NEXT:    store i8 [[STXNCPY_CHAR0]], ptr [[DST]], align 1
 ; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
 ; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
-; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
 ; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_SEL]])
 ; ANY-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InstCombine/str-int.ll b/llvm/test/Transforms/InstCombine/str-int.ll
index ee8d04d2f0e2a5..27b5a2596ff4ea 100644
--- a/llvm/test/Transforms/InstCombine/str-int.ll
+++ b/llvm/test/Transforms/InstCombine/str-int.ll
@@ -45,7 +45,7 @@ define i32 @strtol_hex() #0 {
 
 define i32 @strtol_endptr_not_null(ptr %pend) {
 ; CHECK-LABEL: @strtol_endptr_not_null(
-; CHECK-NEXT:    [[ENDP1:%.*]] = getelementptr inbounds i8, ptr [[PEND:%.*]], i64 8
+; CHECK-NEXT:    [[ENDP1:%.*]] = getelementptr inbounds nuw i8, ptr [[PEND:%.*]], i64 8
 ; CHECK-NEXT:    store ptr getelementptr inbounds (i8, ptr @.str, i64 2), ptr [[ENDP1]], align 8
 ; CHECK-NEXT:    ret i32 12
 ;

diff  --git a/llvm/test/Transforms/InstCombine/strlcpy-1.ll b/llvm/test/Transforms/InstCombine/strlcpy-1.ll
index 7ca6c1599f191f..3336e6b8d1bc2d 100644
--- a/llvm/test/Transforms/InstCombine/strlcpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/strlcpy-1.ll
@@ -109,15 +109,15 @@ define void @fold_strlcpy_s5(ptr %dst) {
 ; BE-NEXT:    store i8 0, ptr [[DST]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; BE-NEXT:    store i8 49, ptr [[DST]], align 1
-; BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    store i8 0, ptr [[TMP1]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; BE-NEXT:    store i16 12594, ptr [[DST]], align 1
-; BE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; BE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
 ; BE-NEXT:    store i8 0, ptr [[TMP2]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(3) @s4, i64 3, i1 false)
-; BE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; BE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
 ; BE-NEXT:    store i8 0, ptr [[TMP3]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @s4, i64 5, i1 false)
@@ -133,15 +133,15 @@ define void @fold_strlcpy_s5(ptr %dst) {
 ; LE-NEXT:    store i8 0, ptr [[DST]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; LE-NEXT:    store i8 49, ptr [[DST]], align 1
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    store i8 0, ptr [[TMP1]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; LE-NEXT:    store i16 12849, ptr [[DST]], align 1
-; LE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; LE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
 ; LE-NEXT:    store i8 0, ptr [[TMP2]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(3) @s4, i64 3, i1 false)
-; LE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; LE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
 ; LE-NEXT:    store i8 0, ptr [[TMP3]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 4)
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @s4, i64 5, i1 false)
@@ -283,19 +283,19 @@ define void @fold_strlcpy_a5(ptr %dst, i64 %n) {
 ; BE-NEXT:    store i8 0, ptr [[DST]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; BE-NEXT:    store i8 49, ptr [[DST]], align 1
-; BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; BE-NEXT:    store i8 0, ptr [[TMP1]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; BE-NEXT:    store i32 825373492, ptr [[DST]], align 1
-; BE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; BE-NEXT:    store i8 0, ptr [[TMP2]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a5, i64 5, i1 false)
-; BE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 5
+; BE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 5
 ; BE-NEXT:    store i8 0, ptr [[TMP3]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a5, i64 5, i1 false)
-; BE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 5
+; BE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 5
 ; BE-NEXT:    store i8 0, ptr [[TMP4]], align 1
 ; BE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; BE-NEXT:    ret void
@@ -305,19 +305,19 @@ define void @fold_strlcpy_a5(ptr %dst, i64 %n) {
 ; LE-NEXT:    store i8 0, ptr [[DST]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; LE-NEXT:    store i8 49, ptr [[DST]], align 1
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
 ; LE-NEXT:    store i8 0, ptr [[TMP1]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; LE-NEXT:    store i32 875770417, ptr [[DST]], align 1
-; LE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
 ; LE-NEXT:    store i8 0, ptr [[TMP2]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a5, i64 5, i1 false)
-; LE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 5
+; LE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 5
 ; LE-NEXT:    store i8 0, ptr [[TMP3]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a5, i64 5, i1 false)
-; LE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 5
+; LE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 5
 ; LE-NEXT:    store i8 0, ptr [[TMP4]], align 1
 ; LE-NEXT:    call void @sink(ptr nonnull [[DST]], i64 5)
 ; LE-NEXT:    ret void

diff  --git a/llvm/test/Transforms/InstCombine/strlen-1.ll b/llvm/test/Transforms/InstCombine/strlen-1.ll
index 8def4dd9747f9c..facf4f7d0973f2 100644
--- a/llvm/test/Transforms/InstCombine/strlen-1.ll
+++ b/llvm/test/Transforms/InstCombine/strlen-1.ll
@@ -180,7 +180,7 @@ define i32 @test_no_simplify2_no_null_opt(i32 %x) #0 {
 define i32 @test_no_simplify3(i32 %x) {
 ; CHECK-LABEL: @test_no_simplify3(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 15
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], ptr @null_hello_mid, i32 0, i32 [[AND]]
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds nuw [13 x i8], ptr @null_hello_mid, i32 0, i32 [[AND]]
 ; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(ptr noundef nonnull dereferenceable(1) [[HELLO_P]])
 ; CHECK-NEXT:    ret i32 [[HELLO_L]]
 ;
@@ -193,8 +193,8 @@ define i32 @test_no_simplify3(i32 %x) {
 define i32 @test_no_simplify3_on_null_opt(i32 %x) #0 {
 ; CHECK-LABEL: @test_no_simplify3_on_null_opt(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 15
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], ptr @null_hello_mid, i32 0, i32 [[AND]]
-; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(ptr noundef [[HELLO_P]])
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds nuw [13 x i8], ptr @null_hello_mid, i32 0, i32 [[AND]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(ptr noundef nonnull dereferenceable(1) [[HELLO_P]])
 ; CHECK-NEXT:    ret i32 [[HELLO_L]]
 ;
   %and = and i32 %x, 15

diff  --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
index b52a062fc64041..5c547ab7def6d5 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa-2.ll
@@ -11,10 +11,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounw
 define void @test1(ptr %a1, ptr %a2) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, ptr [[A2:%.*]], i64 2
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[A2:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A2]], align 2, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    store i16 [[TMP0]], ptr [[A1:%.*]], align 2, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds i8, ptr [[A1]], i64 2
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw i8, ptr [[A1]], i64 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[B]], align 2, !tbaa [[TBAA6:![0-9]+]]
 ; CHECK-NEXT:    store i16 [[TMP1]], ptr [[B2]], align 2, !tbaa [[TBAA6]]
 ; CHECK-NEXT:    ret void
@@ -37,6 +37,8 @@ entry:
 !7 = !{!5, !5, i64 0, i64 2}
 !8 = !{!1, !4, i64 2, i64 2}
 
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
 ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META4:![0-9]+]], i64 0, i64 2}
 ; CHECK: [[META1]] = !{[[META2:![0-9]+]], i64 4, !"_ZTS1T", [[META4]], i64 0, i64 2, [[META4]], i64 2, i64 2}

diff  --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index 4fc225454d3119..e89419d1f3838a 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -1125,7 +1125,7 @@ define i64 @test59(ptr %foo, i64 %i) {
 ; CHECK-LABEL: @test59(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[FOO:%.*]], i64 [[I:%.*]]
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4200
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[FOO]], i64 4200
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds nuw i8, ptr [[FOO]], i64 4200
 ; CHECK-NEXT:    store ptr [[GEP1]], ptr @dummy_global1, align 8
 ; CHECK-NEXT:    store ptr [[GEP2]], ptr @dummy_global2, align 8
 ; CHECK-NEXT:    ret i64 [[I]]

diff  --git a/llvm/test/Transforms/InstCombine/unpack-fca.ll b/llvm/test/Transforms/InstCombine/unpack-fca.ll
index b0cd3823dcca99..bd96611a7a6514 100644
--- a/llvm/test/Transforms/InstCombine/unpack-fca.ll
+++ b/llvm/test/Transforms/InstCombine/unpack-fca.ll
@@ -24,7 +24,7 @@ define void @storeA(ptr %a.ptr) {
 define void @storeB(ptr %b.ptr) {
 ; CHECK-LABEL: @storeB(
 ; CHECK-NEXT:    store ptr null, ptr [[B_PTR:%.*]], align 8
-; CHECK-NEXT:    [[B_PTR_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[B_PTR]], i64 8
+; CHECK-NEXT:    [[B_PTR_REPACK1:%.*]] = getelementptr inbounds nuw i8, ptr [[B_PTR]], i64 8
 ; CHECK-NEXT:    store i64 42, ptr [[B_PTR_REPACK1]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -76,14 +76,14 @@ define void @storeArrayOfB(ptr %ab.ptr, [2 x %B] %ab) {
 ; CHECK-NEXT:    [[AB_ELT:%.*]] = extractvalue [2 x %B] [[AB:%.*]], 0
 ; CHECK-NEXT:    [[AB_ELT_ELT:%.*]] = extractvalue [[B:%.*]] [[AB_ELT]], 0
 ; CHECK-NEXT:    store ptr [[AB_ELT_ELT]], ptr [[AB_PTR:%.*]], align 8
-; CHECK-NEXT:    [[AB_PTR_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[AB_PTR]], i64 8
+; CHECK-NEXT:    [[AB_PTR_REPACK3:%.*]] = getelementptr inbounds nuw i8, ptr [[AB_PTR]], i64 8
 ; CHECK-NEXT:    [[AB_ELT_ELT4:%.*]] = extractvalue [[B]] [[AB_ELT]], 1
 ; CHECK-NEXT:    store i64 [[AB_ELT_ELT4]], ptr [[AB_PTR_REPACK3]], align 8
-; CHECK-NEXT:    [[AB_PTR_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[AB_PTR]], i64 16
+; CHECK-NEXT:    [[AB_PTR_REPACK1:%.*]] = getelementptr inbounds nuw i8, ptr [[AB_PTR]], i64 16
 ; CHECK-NEXT:    [[AB_ELT2:%.*]] = extractvalue [2 x %B] [[AB]], 1
 ; CHECK-NEXT:    [[AB_ELT2_ELT:%.*]] = extractvalue [[B]] [[AB_ELT2]], 0
 ; CHECK-NEXT:    store ptr [[AB_ELT2_ELT]], ptr [[AB_PTR_REPACK1]], align 8
-; CHECK-NEXT:    [[AB_PTR_REPACK1_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[AB_PTR]], i64 24
+; CHECK-NEXT:    [[AB_PTR_REPACK1_REPACK5:%.*]] = getelementptr inbounds nuw i8, ptr [[AB_PTR]], i64 24
 ; CHECK-NEXT:    [[AB_ELT2_ELT6:%.*]] = extractvalue [[B]] [[AB_ELT2]], 1
 ; CHECK-NEXT:    store i64 [[AB_ELT2_ELT6]], ptr [[AB_PTR_REPACK1_REPACK5]], align 8
 ; CHECK-NEXT:    ret void
@@ -106,7 +106,7 @@ define %B @loadB(ptr %b.ptr) {
 ; CHECK-LABEL: @loadB(
 ; CHECK-NEXT:    [[DOTUNPACK:%.*]] = load ptr, ptr [[B_PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[B:%.*]] poison, ptr [[DOTUNPACK]], 0
-; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds i8, ptr [[B_PTR]], i64 8
+; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[B_PTR]], i64 8
 ; CHECK-NEXT:    [[DOTUNPACK2:%.*]] = load i64, ptr [[DOTELT1]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[B]] [[TMP1]], i64 [[DOTUNPACK2]], 1
 ; CHECK-NEXT:    ret [[B]] [[TMP2]]
@@ -162,7 +162,7 @@ define { %A } @structOfA(ptr %sa.ptr) {
 define %B @structB(ptr %b.ptr) {
 ; CHECK-LABEL: @structB(
 ; CHECK-NEXT:    store ptr null, ptr [[B_PTR:%.*]], align 8
-; CHECK-NEXT:    [[B_PTR_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[B_PTR]], i64 8
+; CHECK-NEXT:    [[B_PTR_REPACK1:%.*]] = getelementptr inbounds nuw i8, ptr [[B_PTR]], i64 8
 ; CHECK-NEXT:    store i64 42, ptr [[B_PTR_REPACK1]], align 8
 ; CHECK-NEXT:    ret [[B:%.*]] { ptr null, i64 42 }
 ;
@@ -175,14 +175,14 @@ define [2 x %B] @loadArrayOfB(ptr %ab.ptr) {
 ; CHECK-LABEL: @loadArrayOfB(
 ; CHECK-NEXT:    [[DOTUNPACK_UNPACK:%.*]] = load ptr, ptr [[AB_PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[B:%.*]] poison, ptr [[DOTUNPACK_UNPACK]], 0
-; CHECK-NEXT:    [[DOTUNPACK_ELT3:%.*]] = getelementptr inbounds i8, ptr [[AB_PTR]], i64 8
+; CHECK-NEXT:    [[DOTUNPACK_ELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[AB_PTR]], i64 8
 ; CHECK-NEXT:    [[DOTUNPACK_UNPACK4:%.*]] = load i64, ptr [[DOTUNPACK_ELT3]], align 8
 ; CHECK-NEXT:    [[DOTUNPACK5:%.*]] = insertvalue [[B]] [[TMP1]], i64 [[DOTUNPACK_UNPACK4]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [2 x %B] poison, [[B]] [[DOTUNPACK5]], 0
-; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds i8, ptr [[AB_PTR]], i64 16
+; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[AB_PTR]], i64 16
 ; CHECK-NEXT:    [[DOTUNPACK2_UNPACK:%.*]] = load ptr, ptr [[DOTELT1]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue [[B]] poison, ptr [[DOTUNPACK2_UNPACK]], 0
-; CHECK-NEXT:    [[DOTUNPACK2_ELT6:%.*]] = getelementptr inbounds i8, ptr [[AB_PTR]], i64 24
+; CHECK-NEXT:    [[DOTUNPACK2_ELT6:%.*]] = getelementptr inbounds nuw i8, ptr [[AB_PTR]], i64 24
 ; CHECK-NEXT:    [[DOTUNPACK2_UNPACK7:%.*]] = load i64, ptr [[DOTUNPACK2_ELT6]], align 8
 ; CHECK-NEXT:    [[DOTUNPACK28:%.*]] = insertvalue [[B]] [[TMP3]], i64 [[DOTUNPACK2_UNPACK7]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertvalue [2 x %B] [[TMP2]], [[B]] [[DOTUNPACK28]], 1
@@ -207,7 +207,7 @@ define [2000 x %B] @loadLargeArrayOfB(ptr %ab.ptr) {
 ; Make sure that we do not increase alignment of packed struct element
 define i32 @packed_alignment(ptr dereferenceable(9) %s) {
 ; CHECK-LABEL: @packed_alignment(
-; CHECK-NEXT:    [[TV_ELT1:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 5
+; CHECK-NEXT:    [[TV_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 5
 ; CHECK-NEXT:    [[TV_UNPACK2:%.*]] = load i32, ptr [[TV_ELT1]], align 1
 ; CHECK-NEXT:    ret i32 [[TV_UNPACK2]]
 ;
@@ -222,38 +222,38 @@ define i32 @packed_alignment(ptr dereferenceable(9) %s) {
 define void @check_alignment(ptr %u, ptr %v) {
 ; CHECK-LABEL: @check_alignment(
 ; CHECK-NEXT:    [[DOTUNPACK:%.*]] = load i8, ptr [[U:%.*]], align 8
-; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 1
+; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 1
 ; CHECK-NEXT:    [[DOTUNPACK2:%.*]] = load i8, ptr [[DOTELT1]], align 1
-; CHECK-NEXT:    [[DOTELT3:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 2
+; CHECK-NEXT:    [[DOTELT3:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 2
 ; CHECK-NEXT:    [[DOTUNPACK4:%.*]] = load i8, ptr [[DOTELT3]], align 2
-; CHECK-NEXT:    [[DOTELT5:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 3
+; CHECK-NEXT:    [[DOTELT5:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 3
 ; CHECK-NEXT:    [[DOTUNPACK6:%.*]] = load i8, ptr [[DOTELT5]], align 1
-; CHECK-NEXT:    [[DOTELT7:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 4
+; CHECK-NEXT:    [[DOTELT7:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 4
 ; CHECK-NEXT:    [[DOTUNPACK8:%.*]] = load i8, ptr [[DOTELT7]], align 4
-; CHECK-NEXT:    [[DOTELT9:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 5
+; CHECK-NEXT:    [[DOTELT9:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 5
 ; CHECK-NEXT:    [[DOTUNPACK10:%.*]] = load i8, ptr [[DOTELT9]], align 1
-; CHECK-NEXT:    [[DOTELT11:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 6
+; CHECK-NEXT:    [[DOTELT11:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 6
 ; CHECK-NEXT:    [[DOTUNPACK12:%.*]] = load i8, ptr [[DOTELT11]], align 2
-; CHECK-NEXT:    [[DOTELT13:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 7
+; CHECK-NEXT:    [[DOTELT13:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 7
 ; CHECK-NEXT:    [[DOTUNPACK14:%.*]] = load i8, ptr [[DOTELT13]], align 1
-; CHECK-NEXT:    [[DOTELT15:%.*]] = getelementptr inbounds i8, ptr [[U]], i64 8
+; CHECK-NEXT:    [[DOTELT15:%.*]] = getelementptr inbounds nuw i8, ptr [[U]], i64 8
 ; CHECK-NEXT:    [[DOTUNPACK16:%.*]] = load i64, ptr [[DOTELT15]], align 8
 ; CHECK-NEXT:    store i8 [[DOTUNPACK]], ptr [[V:%.*]], align 8
-; CHECK-NEXT:    [[V_REPACK17:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 1
+; CHECK-NEXT:    [[V_REPACK17:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 1
 ; CHECK-NEXT:    store i8 [[DOTUNPACK2]], ptr [[V_REPACK17]], align 1
-; CHECK-NEXT:    [[V_REPACK19:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 2
+; CHECK-NEXT:    [[V_REPACK19:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 2
 ; CHECK-NEXT:    store i8 [[DOTUNPACK4]], ptr [[V_REPACK19]], align 2
-; CHECK-NEXT:    [[V_REPACK21:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 3
+; CHECK-NEXT:    [[V_REPACK21:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 3
 ; CHECK-NEXT:    store i8 [[DOTUNPACK6]], ptr [[V_REPACK21]], align 1
-; CHECK-NEXT:    [[V_REPACK23:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 4
+; CHECK-NEXT:    [[V_REPACK23:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 4
 ; CHECK-NEXT:    store i8 [[DOTUNPACK8]], ptr [[V_REPACK23]], align 4
-; CHECK-NEXT:    [[V_REPACK25:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 5
+; CHECK-NEXT:    [[V_REPACK25:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 5
 ; CHECK-NEXT:    store i8 [[DOTUNPACK10]], ptr [[V_REPACK25]], align 1
-; CHECK-NEXT:    [[V_REPACK27:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 6
+; CHECK-NEXT:    [[V_REPACK27:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 6
 ; CHECK-NEXT:    store i8 [[DOTUNPACK12]], ptr [[V_REPACK27]], align 2
-; CHECK-NEXT:    [[V_REPACK29:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 7
+; CHECK-NEXT:    [[V_REPACK29:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 7
 ; CHECK-NEXT:    store i8 [[DOTUNPACK14]], ptr [[V_REPACK29]], align 1
-; CHECK-NEXT:    [[V_REPACK31:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 8
+; CHECK-NEXT:    [[V_REPACK31:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
 ; CHECK-NEXT:    store i64 [[DOTUNPACK16]], ptr [[V_REPACK31]], align 8
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
index 4ac1d084cda9a5..c8e5c239af5284 100644
--- a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
@@ -3,7 +3,7 @@
 
 define <4 x ptr> @PR41270(ptr %x) {
 ; CHECK-LABEL: @PR41270(
-; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i64 6
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds nuw i8, ptr [[X:%.*]], i64 6
 ; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x ptr> poison, ptr [[T3]], i64 0
 ; CHECK-NEXT:    ret <4 x ptr> [[INS2]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
index 8b442133335423..3732af6f861415 100644
--- a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
+++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
@@ -3,7 +3,7 @@
 
 define <4 x ptr> @PR41270(ptr %x) {
 ; CHECK-LABEL: @PR41270(
-; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i64 6
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds nuw i8, ptr [[X:%.*]], i64 6
 ; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x ptr> <ptr poison, ptr undef, ptr undef, ptr undef>, ptr [[T3]], i64 0
 ; CHECK-NEXT:    ret <4 x ptr> [[INS2]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll
index f424650d21e840..f85b8f9c5c546c 100644
--- a/llvm/test/Transforms/InstCombine/vscale_gep.ll
+++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll
@@ -44,7 +44,7 @@ define void @gep_bitcast(ptr %p) {
 define i32 @gep_alloca_inbounds_vscale_zero() {
 ; CHECK-LABEL: @gep_alloca_inbounds_vscale_zero(
 ; CHECK-NEXT:    [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 8
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[TMP]], align 4
 ; CHECK-NEXT:    ret i32 [[LOAD]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/wcslen-1.ll b/llvm/test/Transforms/InstCombine/wcslen-1.ll
index 8833754a536784..68ab3470f67685 100644
--- a/llvm/test/Transforms/InstCombine/wcslen-1.ll
+++ b/llvm/test/Transforms/InstCombine/wcslen-1.ll
@@ -176,7 +176,7 @@ define i64 @test_no_simplify3(i32 %x) {
 ; CHECK-LABEL: @test_no_simplify3(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 15
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[AND]] to i64
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i32], ptr @null_hello_mid, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds nuw [13 x i32], ptr @null_hello_mid, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
 ; CHECK-NEXT:    ret i64 [[HELLO_L]]
 ;
@@ -190,8 +190,8 @@ define i64 @test_no_simplify3_no_null_opt(i32 %x) #0 {
 ; CHECK-LABEL: @test_no_simplify3_no_null_opt(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 15
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[AND]] to i64
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i32], ptr @null_hello_mid, i64 0, i64 [[TMP1]]
-; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr [[HELLO_P]])
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds nuw [13 x i32], ptr @null_hello_mid, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
 ; CHECK-NEXT:    ret i64 [[HELLO_L]]
 ;
   %and = and i32 %x, 15

diff  --git a/llvm/test/Transforms/InstCombine/wcslen-3.ll b/llvm/test/Transforms/InstCombine/wcslen-3.ll
index 6cabe0570cfe49..39516de0a0800d 100644
--- a/llvm/test/Transforms/InstCombine/wcslen-3.ll
+++ b/llvm/test/Transforms/InstCombine/wcslen-3.ll
@@ -165,7 +165,7 @@ define i64 @test_no_simplify3(i16 %x) {
 ; CHECK-LABEL: @test_no_simplify3(
 ; CHECK-NEXT:    [[AND:%.*]] = and i16 [[X:%.*]], 15
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i16 [[AND]] to i64
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i16], ptr @null_hello_mid, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds nuw [13 x i16], ptr @null_hello_mid, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
 ; CHECK-NEXT:    ret i64 [[HELLO_L]]
 ;

diff  --git a/llvm/test/Transforms/InstCombine/wcslen-5.ll b/llvm/test/Transforms/InstCombine/wcslen-5.ll
index 3e0cdf34a6422a..33c6075a602a35 100644
--- a/llvm/test/Transforms/InstCombine/wcslen-5.ll
+++ b/llvm/test/Transforms/InstCombine/wcslen-5.ll
@@ -42,7 +42,7 @@ define dso_local i64 @fold_wcslen_s3_pi_p1_s5(i1 zeroext %0, i64 %1) {
 ; XFAIL-CHECK-NEXT:    ret i64 [[SEL]]
 ; CHECK-LABEL: @fold_wcslen_s3_pi_p1_s5(
 ; CHECK-NEXT:    [[PS3_PI:%.*]] = getelementptr inbounds [4 x i32], ptr @ws3, i64 0, i64 [[TMP1:%.*]]
-; CHECK-NEXT:    [[PS3_PI_P1:%.*]] = getelementptr inbounds i8, ptr [[PS3_PI]], i64 4
+; CHECK-NEXT:    [[PS3_PI_P1:%.*]] = getelementptr inbounds nuw i8, ptr [[PS3_PI]], i64 4
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[TMP0:%.*]], ptr [[PS3_PI_P1]], ptr @ws5
 ; CHECK-NEXT:    [[LEN:%.*]] = tail call i64 @wcslen(ptr nonnull [[SEL]])
 ; CHECK-NEXT:    ret i64 [[LEN]]

diff  --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
index cba80a3a2bc5fb..b8215d9b6f33cb 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
@@ -19,50 +19,50 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
 ; CHECK-A55:       for.body6:
 ; CHECK-A55-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY6]] ]
 ; CHECK-A55-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
-; CHECK-A55-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[ARG_2:%.*]], i64 [[INDVARS_IV]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-A55-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; CHECK-A55-NEXT:    [[CONV:%.*]] = sext i16 [[TMP1]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[ARG_3:%.*]], i64 [[INDVARS_IV]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-A55-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
 ; CHECK-A55-NEXT:    [[CONV15:%.*]] = sext i16 [[TMP2]] to i32
 ; CHECK-A55-NEXT:    [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[ARG_1:%.*]], i64 [[INDVARS_IV]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-A55-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
 ; CHECK-A55-NEXT:    [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP3]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21]], ptr [[ARRAYIDX20]], align 4
 ; CHECK-A55-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
-; CHECK-A55-NEXT:    [[ARRAYIDX10_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-A55-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX10_1]], align 2
 ; CHECK-A55-NEXT:    [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-A55-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX14_1]], align 2
 ; CHECK-A55-NEXT:    [[CONV15_1:%.*]] = sext i16 [[TMP5]] to i32
 ; CHECK-A55-NEXT:    [[MUL16_1:%.*]] = mul nsw i32 [[CONV15_1]], [[CONV_1]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20_1:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20_1:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-A55-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX20_1]], align 4
 ; CHECK-A55-NEXT:    [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[TMP6]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_1]], ptr [[ARRAYIDX20_1]], align 4
 ; CHECK-A55-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
-; CHECK-A55-NEXT:    [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-A55-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX10_2]], align 2
 ; CHECK-A55-NEXT:    [[CONV_2:%.*]] = sext i16 [[TMP7]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-A55-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX14_2]], align 2
 ; CHECK-A55-NEXT:    [[CONV15_2:%.*]] = sext i16 [[TMP8]] to i32
 ; CHECK-A55-NEXT:    [[MUL16_2:%.*]] = mul nsw i32 [[CONV15_2]], [[CONV_2]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20_2:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20_2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-A55-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX20_2]], align 4
 ; CHECK-A55-NEXT:    [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[TMP9]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_2]], ptr [[ARRAYIDX20_2]], align 4
 ; CHECK-A55-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
-; CHECK-A55-NEXT:    [[ARRAYIDX10_3:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10_3:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-A55-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10_3]], align 2
 ; CHECK-A55-NEXT:    [[CONV_3:%.*]] = sext i16 [[TMP10]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14_3:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14_3:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-A55-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX14_3]], align 2
 ; CHECK-A55-NEXT:    [[CONV15_3:%.*]] = sext i16 [[TMP11]] to i32
 ; CHECK-A55-NEXT:    [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-A55-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX20_3]], align 4
 ; CHECK-A55-NEXT:    [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[TMP12]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_3]], ptr [[ARRAYIDX20_3]], align 4
@@ -75,14 +75,14 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
 ; CHECK-A55-NEXT:    [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
 ; CHECK-A55-NEXT:    br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]]
 ; CHECK-A55:       for.body6.epil:
-; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[INDVARS_IV_UNR]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_UNR]]
 ; CHECK-A55-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL]], align 2
 ; CHECK-A55-NEXT:    [[CONV_EPIL:%.*]] = sext i16 [[TMP13]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14_EPIL:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[INDVARS_IV_UNR]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14_EPIL:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_UNR]]
 ; CHECK-A55-NEXT:    [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL]], align 2
 ; CHECK-A55-NEXT:    [[CONV15_EPIL:%.*]] = sext i16 [[TMP14]] to i32
 ; CHECK-A55-NEXT:    [[MUL16_EPIL:%.*]] = mul nsw i32 [[CONV15_EPIL]], [[CONV_EPIL]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20_EPIL:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[INDVARS_IV_UNR]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_UNR]]
 ; CHECK-A55-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL]], align 4
 ; CHECK-A55-NEXT:    [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP15]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_EPIL]], ptr [[ARRAYIDX20_EPIL]], align 4
@@ -90,14 +90,14 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
 ; CHECK-A55-NEXT:    br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_1:%.*]]
 ; CHECK-A55:       for.body6.epil.1:
 ; CHECK-A55-NEXT:    [[INDVARS_IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 1
-; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_EPIL]]
 ; CHECK-A55-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL_1]], align 2
 ; CHECK-A55-NEXT:    [[CONV_EPIL_1:%.*]] = sext i16 [[TMP16]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14_EPIL_1:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_EPIL]]
 ; CHECK-A55-NEXT:    [[TMP17:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL_1]], align 2
 ; CHECK-A55-NEXT:    [[CONV15_EPIL_1:%.*]] = sext i16 [[TMP17]] to i32
 ; CHECK-A55-NEXT:    [[MUL16_EPIL_1:%.*]] = mul nsw i32 [[CONV15_EPIL_1]], [[CONV_EPIL_1]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_EPIL]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20_EPIL_1:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_EPIL]]
 ; CHECK-A55-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL_1]], align 4
 ; CHECK-A55-NEXT:    [[ADD21_EPIL_1:%.*]] = add nsw i32 [[MUL16_EPIL_1]], [[TMP18]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_EPIL_1]], ptr [[ARRAYIDX20_EPIL_1]], align 4
@@ -105,14 +105,14 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
 ; CHECK-A55-NEXT:    br i1 [[EPIL_ITER_CMP_1_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL_2:%.*]]
 ; CHECK-A55:       for.body6.epil.2:
 ; CHECK-A55-NEXT:    [[INDVARS_IV_NEXT_EPIL_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_UNR]], 2
-; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-A55-NEXT:    [[ARRAYIDX10_EPIL_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
 ; CHECK-A55-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX10_EPIL_2]], align 2
 ; CHECK-A55-NEXT:    [[CONV_EPIL_2:%.*]] = sext i16 [[TMP19]] to i32
-; CHECK-A55-NEXT:    [[ARRAYIDX14_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-A55-NEXT:    [[ARRAYIDX14_EPIL_2:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
 ; CHECK-A55-NEXT:    [[TMP20:%.*]] = load i16, ptr [[ARRAYIDX14_EPIL_2]], align 2
 ; CHECK-A55-NEXT:    [[CONV15_EPIL_2:%.*]] = sext i16 [[TMP20]] to i32
 ; CHECK-A55-NEXT:    [[MUL16_EPIL_2:%.*]] = mul nsw i32 [[CONV15_EPIL_2]], [[CONV_EPIL_2]]
-; CHECK-A55-NEXT:    [[ARRAYIDX20_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
+; CHECK-A55-NEXT:    [[ARRAYIDX20_EPIL_2:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1]], i64 [[INDVARS_IV_NEXT_EPIL_1]]
 ; CHECK-A55-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20_EPIL_2]], align 4
 ; CHECK-A55-NEXT:    [[ADD21_EPIL_2:%.*]] = add nsw i32 [[MUL16_EPIL_2]], [[TMP21]]
 ; CHECK-A55-NEXT:    store i32 [[ADD21_EPIL_2]], ptr [[ARRAYIDX20_EPIL_2]], align 4
@@ -129,14 +129,14 @@ define void @runtime_unroll_generic(i32 %arg_0, ptr %arg_1, ptr %arg_2, ptr %arg
 ; CHECK-GENERIC-NEXT:    br label [[FOR_BODY6:%.*]]
 ; CHECK-GENERIC:       for.body6:
 ; CHECK-GENERIC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY6_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY6]] ]
-; CHECK-GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[ARG_2:%.*]], i64 [[INDVARS_IV]]
+; CHECK-GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_2:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-GENERIC-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; CHECK-GENERIC-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
-; CHECK-GENERIC-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, ptr [[ARG_3:%.*]], i64 [[INDVARS_IV]]
+; CHECK-GENERIC-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG_3:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-GENERIC-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX14]], align 2
 ; CHECK-GENERIC-NEXT:    [[CONV15:%.*]] = sext i16 [[TMP1]] to i32
 ; CHECK-GENERIC-NEXT:    [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]]
-; CHECK-GENERIC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[ARG_1:%.*]], i64 [[INDVARS_IV]]
+; CHECK-GENERIC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds nuw i32, ptr [[ARG_1:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-GENERIC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4
 ; CHECK-GENERIC-NEXT:    [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]]
 ; CHECK-GENERIC-NEXT:    store i32 [[ADD21]], ptr [[ARRAYIDX20]], align 4

diff  --git a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
index 141af3e66949a3..302cc96c9961e0 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll
@@ -17,7 +17,7 @@ define void @test(ptr %x, i32 %n) {
 ; CHECK-NEXT:    store i32 0, ptr [[X]], align 4
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 4
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i32 4
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[REM]], 1
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END]], label [[WHILE_BODY_1:%.*]]
 ; CHECK:       while.body.1:
@@ -28,7 +28,7 @@ define void @test(ptr %x, i32 %n) {
 ; CHECK-NEXT:    store i32 0, ptr [[INCDEC_PTR]], align 4
 ; CHECK-NEXT:    br label [[IF_END_1]]
 ; CHECK:       if.end.1:
-; CHECK-NEXT:    [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 8
+; CHECK-NEXT:    [[INCDEC_PTR_1:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i32 8
 ; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[REM]], 2
 ; CHECK-NEXT:    br i1 [[CMP_1]], label [[WHILE_BODY_2:%.*]], label [[WHILE_END]]
 ; CHECK:       while.body.2:

diff  --git a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
index b59f05bd11f0d8..ea499e548934f5 100644
--- a/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
+++ b/llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll
@@ -8,41 +8,41 @@ define hidden void @compile_time_full(ptr nocapture %a, ptr nocapture readonly %
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[I:%.*]] = load i8, ptr [[B:%.*]], align 1
 ; CHECK-NEXT:    store i8 [[I]], ptr [[A:%.*]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 1
 ; CHECK-NEXT:    [[I_1:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX1_1:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 1
 ; CHECK-NEXT:    store i8 [[I_1]], ptr [[ARRAYIDX1_1]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 2
 ; CHECK-NEXT:    [[I_2:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 2
+; CHECK-NEXT:    [[ARRAYIDX1_2:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 2
 ; CHECK-NEXT:    store i8 [[I_2]], ptr [[ARRAYIDX1_2]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 3
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 3
 ; CHECK-NEXT:    [[I_3:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_3:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 3
+; CHECK-NEXT:    [[ARRAYIDX1_3:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 3
 ; CHECK-NEXT:    store i8 [[I_3]], ptr [[ARRAYIDX1_3]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 4
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 4
 ; CHECK-NEXT:    [[I_4:%.*]] = load i8, ptr [[ARRAYIDX_4]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_4:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 4
+; CHECK-NEXT:    [[ARRAYIDX1_4:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 4
 ; CHECK-NEXT:    store i8 [[I_4]], ptr [[ARRAYIDX1_4]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 5
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 5
 ; CHECK-NEXT:    [[I_5:%.*]] = load i8, ptr [[ARRAYIDX_5]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_5:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 5
+; CHECK-NEXT:    [[ARRAYIDX1_5:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 5
 ; CHECK-NEXT:    store i8 [[I_5]], ptr [[ARRAYIDX1_5]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 6
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 6
 ; CHECK-NEXT:    [[I_6:%.*]] = load i8, ptr [[ARRAYIDX_6]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_6:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 6
+; CHECK-NEXT:    [[ARRAYIDX1_6:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 6
 ; CHECK-NEXT:    store i8 [[I_6]], ptr [[ARRAYIDX1_6]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 7
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 7
 ; CHECK-NEXT:    [[I_7:%.*]] = load i8, ptr [[ARRAYIDX_7]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_7:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 7
+; CHECK-NEXT:    [[ARRAYIDX1_7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 7
 ; CHECK-NEXT:    store i8 [[I_7]], ptr [[ARRAYIDX1_7]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 8
+; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 8
 ; CHECK-NEXT:    [[I_8:%.*]] = load i8, ptr [[ARRAYIDX_8]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_8:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8
+; CHECK-NEXT:    [[ARRAYIDX1_8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 8
 ; CHECK-NEXT:    store i8 [[I_8]], ptr [[ARRAYIDX1_8]], align 1
-; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 9
+; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i32 9
 ; CHECK-NEXT:    [[I_9:%.*]] = load i8, ptr [[ARRAYIDX_9]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1_9:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 9
+; CHECK-NEXT:    [[ARRAYIDX1_9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i32 9
 ; CHECK-NEXT:    store i8 [[I_9]], ptr [[ARRAYIDX1_9]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -71,28 +71,28 @@ define hidden void @compile_time_partial(ptr nocapture %a, ptr nocapture readonl
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[I_07]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[B:%.*]], i32 [[I_07]]
 ; CHECK-NEXT:    [[I:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[I]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i32 [[I_07]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i16, ptr [[A:%.*]], i32 [[I_07]]
 ; CHECK-NEXT:    store i16 [[ADD]], ptr [[ARRAYIDX2]], align 2
 ; CHECK-NEXT:    [[INC:%.*]] = or disjoint i32 [[I_07]], 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INC]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i16, ptr [[B]], i32 [[INC]]
 ; CHECK-NEXT:    [[I_1:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 2
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add i16 [[I_1]], 1
-; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INC]]
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i32 [[INC]]
 ; CHECK-NEXT:    store i16 [[ADD_1]], ptr [[ARRAYIDX2_1]], align 2
 ; CHECK-NEXT:    [[INC_1:%.*]] = or disjoint i32 [[I_07]], 2
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INC_1]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i16, ptr [[B]], i32 [[INC_1]]
 ; CHECK-NEXT:    [[I_2:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 2
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add i16 [[I_2]], 1
-; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INC_1]]
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i32 [[INC_1]]
 ; CHECK-NEXT:    store i16 [[ADD_2]], ptr [[ARRAYIDX2_2]], align 2
 ; CHECK-NEXT:    [[INC_2:%.*]] = or disjoint i32 [[I_07]], 3
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[INC_2]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i16, ptr [[B]], i32 [[INC_2]]
 ; CHECK-NEXT:    [[I_3:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 2
 ; CHECK-NEXT:    [[ADD_3:%.*]] = add i16 [[I_3]], 1
-; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[INC_2]]
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i32 [[INC_2]]
 ; CHECK-NEXT:    store i16 [[ADD_3]], ptr [[ARRAYIDX2_3]], align 2
 ; CHECK-NEXT:    [[INC_3]] = add nuw nsw i32 [[I_07]], 4
 ; CHECK-NEXT:    [[EXITCOND_NOT_3:%.*]] = icmp eq i32 [[INC_3]], 1000
@@ -203,7 +203,7 @@ define hidden void @dont_unroll_call(ptr nocapture %a, ptr nocapture readonly %b
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[I1]], [[I]]
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_013]]
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 [[I_013]], i32 [[MUL]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 [[I_013]], i32 [[MUL]])
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_013]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
@@ -238,9 +238,9 @@ define hidden void @dont_unroll_optsize(ptr nocapture %a, ptr nocapture readonly
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_06]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i32 [[I_06]]
 ; CHECK-NEXT:    [[I:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_06]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i32 [[I_06]]
 ; CHECK-NEXT:    store i8 [[I]], ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_06]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 10
@@ -271,9 +271,9 @@ define hidden void @dont_unroll_minsize(ptr nocapture %a, ptr nocapture readonly
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_06]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i32 [[I_06]]
 ; CHECK-NEXT:    [[I:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_06]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i32 [[I_06]]
 ; CHECK-NEXT:    store i8 [[I]], ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_06]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 10

diff  --git a/llvm/test/Transforms/LoopUnroll/peel-loop.ll b/llvm/test/Transforms/LoopUnroll/peel-loop.ll
index a76facae3cb9d9..82ac7a90a82d8b 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop.ll
@@ -16,13 +16,13 @@ define void @basic(ptr %p, i32 %k) #0 {
 ; CHECK-NEXT:    [[CMP_PEEL_NOT:%.*]] = icmp eq i32 [[K]], 1
 ; CHECK-NEXT:    br i1 [[CMP_PEEL_NOT]], label [[FOR_END]], label [[FOR_BODY_PEEL2:%.*]]
 ; CHECK:       for.body.peel2:
-; CHECK-NEXT:    [[INCDEC_PTR_PEEL:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
+; CHECK-NEXT:    [[INCDEC_PTR_PEEL:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
 ; CHECK-NEXT:    store i32 1, ptr [[INCDEC_PTR_PEEL]], align 4
 ; CHECK-NEXT:    [[CMP_PEEL5:%.*]] = icmp sgt i32 [[K]], 2
 ; CHECK-NEXT:    br i1 [[CMP_PEEL5]], label [[FOR_BODY_PEEL7:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.peel7:
-; CHECK-NEXT:    [[INCDEC_PTR_PEEL3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-; CHECK-NEXT:    [[INCDEC_PTR_PEEL8:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
+; CHECK-NEXT:    [[INCDEC_PTR_PEEL3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+; CHECK-NEXT:    [[INCDEC_PTR_PEEL8:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 ; CHECK-NEXT:    store i32 2, ptr [[INCDEC_PTR_PEEL3]], align 4
 ; CHECK-NEXT:    [[CMP_PEEL10_NOT:%.*]] = icmp eq i32 [[K]], 3
 ; CHECK-NEXT:    br i1 [[CMP_PEEL10_NOT]], label [[FOR_END]], label [[FOR_BODY:%.*]]
@@ -76,13 +76,13 @@ define i32 @output(ptr %p, i32 %k) #0 {
 ; CHECK-NEXT:    [[CMP_PEEL_NOT:%.*]] = icmp eq i32 [[K]], 1
 ; CHECK-NEXT:    br i1 [[CMP_PEEL_NOT]], label [[FOR_END]], label [[FOR_BODY_PEEL2:%.*]]
 ; CHECK:       for.body.peel2:
-; CHECK-NEXT:    [[INCDEC_PTR_PEEL:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
+; CHECK-NEXT:    [[INCDEC_PTR_PEEL:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
 ; CHECK-NEXT:    store i32 1, ptr [[INCDEC_PTR_PEEL]], align 4
 ; CHECK-NEXT:    [[CMP_PEEL5:%.*]] = icmp sgt i32 [[K]], 2
 ; CHECK-NEXT:    br i1 [[CMP_PEEL5]], label [[FOR_BODY_PEEL7:%.*]], label [[FOR_END]]
 ; CHECK:       for.body.peel7:
-; CHECK-NEXT:    [[INCDEC_PTR_PEEL3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8
-; CHECK-NEXT:    [[INCDEC_PTR_PEEL8:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 12
+; CHECK-NEXT:    [[INCDEC_PTR_PEEL3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+; CHECK-NEXT:    [[INCDEC_PTR_PEEL8:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 12
 ; CHECK-NEXT:    store i32 2, ptr [[INCDEC_PTR_PEEL3]], align 4
 ; CHECK-NEXT:    [[CMP_PEEL10_NOT:%.*]] = icmp eq i32 [[K]], 3
 ; CHECK-NEXT:    br i1 [[CMP_PEEL10_NOT]], label [[FOR_END]], label [[FOR_BODY:%.*]]

diff  --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
index 4307be7a8b5a73..a3cfeac1f86bec 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
@@ -66,30 +66,30 @@ define i32 @unroll(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %N)
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[C_010:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_3]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[MUL]], [[C_010]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX2_1]], align 4
 ; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[ADD]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2_2]], align 4
 ; CHECK-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = add nsw i32 [[MUL_2]], [[ADD_1]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX2_3]], align 4
 ; CHECK-NEXT:    [[MUL_3:%.*]] = mul nsw i32 [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[ADD_3]] = add nsw i32 [[MUL_3]], [[ADD_2]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
index 8cda055a53ad8f..6d43c1e1c7d48b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -91,17 +91,17 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP27]] to i32
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i8 [[TMP28]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV3]], [[CONV]]
 ; CHECK-NEXT:    [[SHR_26:%.*]] = lshr i32 [[MUL]], 8
 ; CHECK-NEXT:    [[CONV4:%.*]] = trunc nuw i32 [[SHR_26]] to i8
 ; CHECK-NEXT:    store i8 [[CONV4]], ptr [[ARRAYIDX2]], align 1
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1
 ; CHECK-NEXT:    [[CONV9:%.*]] = zext i8 [[TMP29]] to i32
 ; CHECK-NEXT:    [[MUL10:%.*]] = mul nuw nsw i32 [[CONV9]], [[CONV]]
@@ -172,7 +172,7 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
@@ -250,7 +250,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
@@ -324,7 +324,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr %
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
@@ -332,7 +332,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr %
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul <16 x i16> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <16 x i16> [[TMP2]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 32
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i64 32
 ; CHECK-NEXT:    store <16 x i16> [[TMP7]], ptr [[TMP9]], align 2
 ; CHECK-NEXT:    store <16 x i16> [[TMP8]], ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
@@ -462,7 +462,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP1]], align 4
 ; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index a5c38177c2dc3e..ab5600a2dc3a64 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -170,7 +170,7 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 168
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 168
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP5]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -201,7 +201,7 @@ define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonl
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP12]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 168
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 168
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
index e437da29b94037..751c121df05269 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -315,7 +315,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP12]], i64 [[DOTIDX]]
 ; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4
 ; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 632687bf5a4130..1e310ca5fcaedd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -480,7 +480,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl nsw <vscale x 4 x i32> [[TMP5]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP7]]
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -498,7 +498,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[LOAD]], 1
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[LSHR]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[LSHR]]
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV]], 1022
@@ -566,7 +566,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl nsw <vscale x 4 x i32> [[TMP13]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP15:%.*]] = and i64 [[INDEX]], 9223372036854775804
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP15]]
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -584,7 +584,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[LOAD]], 1
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[LSHR]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[LSHR]]
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
index f56fae37b9ef04..1a281fe7c6f7fa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll
@@ -79,19 +79,19 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; SCALAR_TAIL_FOLDING:       if.then:
 ; SCALAR_TAIL_FOLDING-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP18:%.*]] = zext nneg i32 [[MUL]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP18]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP18]]
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[ADD:%.*]] = or disjoint i32 [[MUL]], 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP20:%.*]] = zext nneg i32 [[ADD]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP20]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP20]]
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP21:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP19]], i8 [[TMP21]])
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP22:%.*]] = zext nneg i32 [[MUL]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP22]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP22]]
 ; SCALAR_TAIL_FOLDING-NEXT:    store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]]
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP23:%.*]] = zext nneg i32 [[ADD]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP23]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP23]]
 ; SCALAR_TAIL_FOLDING-NEXT:    store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    br label [[FOR_INC]]
 ; SCALAR_TAIL_FOLDING:       for.inc:
@@ -253,14 +253,14 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no
 ; SCALAR_TAIL_FOLDING-NEXT:    [[IX_012:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
 ; SCALAR_TAIL_FOLDING-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_012]], 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP15:%.*]] = zext nneg i32 [[MUL]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP15]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP15]]
 ; SCALAR_TAIL_FOLDING-NEXT:    store i8 1, ptr [[ARRAYIDX]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[CMP1:%.*]] = icmp samesign ugt i32 [[IX_012]], [[CONV]]
 ; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; SCALAR_TAIL_FOLDING:       if.then:
 ; SCALAR_TAIL_FOLDING-NEXT:    [[ADD:%.*]] = or disjoint i32 [[MUL]], 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP16:%.*]] = zext nneg i32 [[ADD]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP16]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP16]]
 ; SCALAR_TAIL_FOLDING-NEXT:    store i8 2, ptr [[ARRAYIDX3]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    br label [[FOR_INC]]
 ; SCALAR_TAIL_FOLDING:       for.inc:
@@ -417,7 +417,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
 ; SCALAR_TAIL_FOLDING-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; SCALAR_TAIL_FOLDING:       if.then:
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP16:%.*]] = zext nneg i32 [[MUL]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP16]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP16]]
 ; SCALAR_TAIL_FOLDING-NEXT:    store i8 1, ptr [[ARRAYIDX]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    br label [[IF_END]]
 ; SCALAR_TAIL_FOLDING:       if.end:
@@ -426,7 +426,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no
 ; SCALAR_TAIL_FOLDING:       if.then6:
 ; SCALAR_TAIL_FOLDING-NEXT:    [[ADD:%.*]] = or disjoint i32 [[MUL]], 1
 ; SCALAR_TAIL_FOLDING-NEXT:    [[TMP17:%.*]] = zext nneg i32 [[ADD]] to i64
-; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP17]]
+; SCALAR_TAIL_FOLDING-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP17]]
 ; SCALAR_TAIL_FOLDING-NEXT:    store i8 2, ptr [[ARRAYIDX7]], align 1
 ; SCALAR_TAIL_FOLDING-NEXT:    br label [[FOR_INC]]
 ; SCALAR_TAIL_FOLDING:       for.inc:

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index ade24b85f5af99..6d99da49d2f73b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -51,7 +51,7 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP16]], 4
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[DOTIDX]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[DOTIDX]]
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP14]], ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], splat (i32 1)
@@ -59,7 +59,7 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[DOTIDX5:%.*]] = shl nuw nsw i64 [[TMP21]], 4
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[DOTIDX5]]
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 [[DOTIDX5]]
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP19]], ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
index b8f94b938fa80e..84fc963833cf23 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll
@@ -73,7 +73,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-no-scalar-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-no-scalar-interleave.ll
index 716e3c4fbd7a2c..013c218da2e168 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-no-scalar-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-no-scalar-interleave.ll
@@ -14,10 +14,10 @@ define void @simple_histogram_forced_scalar_interleave(ptr noalias %buckets, ptr
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll
index 64746fa131d709..c430e72cea7038 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll
@@ -20,17 +20,17 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[GEP_INDICES:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[L_IDX]] to i64
-; CHECK-NEXT:    [[GEP_BUCKET:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[GEP_BUCKET:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[GEP_BUCKET]], align 4
-; CHECK-NEXT:    [[IDX_ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IV]]
+; CHECK-NEXT:    [[IDX_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[IV]]
 ; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
 ; CHECK-NEXT:    store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4
-; CHECK-NEXT:    [[GEP_OTHER:%.*]] = getelementptr inbounds i32, ptr [[OTHER]], i64 [[IV]]
+; CHECK-NEXT:    [[GEP_OTHER:%.*]] = getelementptr inbounds nuw i32, ptr [[OTHER]], i64 [[IV]]
 ; CHECK-NEXT:    [[L_OTHER:%.*]] = load i32, ptr [[GEP_OTHER]], align 4
 ; CHECK-NEXT:    [[ADD_OTHER:%.*]] = add i32 [[L_OTHER]], [[IV_TRUNC]]
 ; CHECK-NEXT:    store i32 [[ADD_OTHER]], ptr [[GEP_OTHER]], align 4
@@ -106,7 +106,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe
 ; NORMAL_DEP_LIMIT-NEXT:    [[GEP_INDICES1:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
 ; NORMAL_DEP_LIMIT-NEXT:    [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES1]], align 4
 ; NORMAL_DEP_LIMIT-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[L_IDX]] to i64
-; NORMAL_DEP_LIMIT-NEXT:    [[GEP_BUCKET:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; NORMAL_DEP_LIMIT-NEXT:    [[GEP_BUCKET:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; NORMAL_DEP_LIMIT-NEXT:    [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4
 ; NORMAL_DEP_LIMIT-NEXT:    [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1
 ; NORMAL_DEP_LIMIT-NEXT:    store i32 [[INC]], ptr [[GEP_BUCKET]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
index 511d4dacc340c7..3b00312959d8aa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll
@@ -63,7 +63,7 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -129,7 +129,7 @@ define void @simple_histogram_inc_param(ptr noalias %buckets, ptr readonly %indi
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], [[INCVAL]]
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -269,7 +269,7 @@ define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices,
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP15]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX3]], align 4
@@ -346,7 +346,7 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N)
 ; CHECK-NEXT:    [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i8 [[TMP1]], 1
 ; CHECK-NEXT:    store i8 [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -384,10 +384,10 @@ define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = fadd fast float [[TMP1]], 1.000000e+00
 ; CHECK-NEXT:    store float [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -425,12 +425,12 @@ define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %ind
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[INCIDX:%.*]] = getelementptr inbounds i32, ptr [[INCVALS]], i64 [[IV]]
+; CHECK-NEXT:    [[INCIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INCVALS]], i64 [[IV]]
 ; CHECK-NEXT:    [[INCVAL:%.*]] = load i32, ptr [[INCIDX]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP1]], [[INCVAL]]
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -483,7 +483,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP15]], 4
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[DOTIDX]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[DOTIDX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
@@ -506,7 +506,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -786,7 +786,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr %
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP17]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP18]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
@@ -831,10 +831,10 @@ define void @simple_histogram_unsafe_alias(ptr %buckets, ptr %indices, i64 %N) #
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
index d0decbff1a4625..ce8492cd77362f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
@@ -189,7 +189,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
 ; CHECK-NEXT:    [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]]
-; CHECK-NEXT:    [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[GEPDST:%.*]] = getelementptr inbounds nuw double, ptr [[DST]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store double [[CALL]], ptr [[GEPDST]], align 8
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]

diff  --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
index 17da8f314b0e74..ab7bb667f3f369 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
@@ -14,7 +14,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
 ; GFX9-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; GFX9-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; GFX9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
-; GFX9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 4
+; GFX9-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP0]], i64 4
 ; GFX9-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
 ; GFX9-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP1]], align 2
 ; GFX9-NEXT:    [[TMP2]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
@@ -44,7 +44,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
 ; VI-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; VI-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; VI-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
-; VI-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 4
+; VI-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP0]], i64 4
 ; VI-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
 ; VI-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP1]], align 2
 ; VI-NEXT:    [[TMP2]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
@@ -70,7 +70,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
 ; CI:       for.body:
 ; CI-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CI-NEXT:    [[Q_04:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDVARS_IV]]
+; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw half, ptr addrspace(1) [[S:%.*]], i64 [[INDVARS_IV]]
 ; CI-NEXT:    [[TMP0:%.*]] = load half, ptr addrspace(1) [[ARRAYIDX]], align 2
 ; CI-NEXT:    [[ADD]] = fadd fast half [[Q_04]], [[TMP0]]
 ; CI-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
index baad7a84a891a6..c459dd28fdaee3 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
@@ -13,7 +13,7 @@ define i64 @add_i64_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[X:%.*]], i32 [[I_08]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[TMP0]], [[R_07]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -76,7 +76,7 @@ define i64 @add_i32_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i32 [[I_08]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP5]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
@@ -141,7 +141,7 @@ define i64 @add_i16_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[I_08]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP5]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
@@ -206,7 +206,7 @@ define i64 @add_i8_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i32 [[I_08]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP5]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[R_07]], [[CONV]]
@@ -519,9 +519,9 @@ define i64 @mla_i64_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[X:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[X:%.*]], i32 [[I_010]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[Y:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i64, ptr [[Y:%.*]], i32 [[I_010]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[MUL]], [[R_09]]
@@ -591,9 +591,9 @@ define i64 @mla_i32_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_010]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i32 [[I_010]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[I_010]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i32, ptr [[Y]], i32 [[I_010]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[MUL]] to i64
@@ -667,10 +667,10 @@ define i64 @mla_i16_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i3
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[I_012]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP9]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[Y]], i32 [[I_012]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP10]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
@@ -747,10 +747,10 @@ define i64 @mla_i8_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i32 [[I_012]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP9]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[Y]], i32 [[I_012]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP10]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV2]], [[CONV]]
@@ -1482,7 +1482,7 @@ define i64 @mla_xx_sext_zext(ptr nocapture noundef readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[S_010:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_011]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[I_011]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP7]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV]]
@@ -1558,7 +1558,7 @@ define i64 @mla_and_add_together_16_64(ptr nocapture noundef readonly %x, i32 no
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[T_018:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[S_017:%.*]] = phi i32 [ [[ADD6]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_019]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[I_019]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP9]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV]], [[CONV]]
@@ -1606,20 +1606,20 @@ define i64 @interleave_doublereduct_i16_i64(ptr %x, ptr %y, i32 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_025:%.*]] = phi i32 [ [[ADD13:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[T_024:%.*]] = phi i64 [ [[ADD12]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[I_025]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i16, ptr [[X:%.*]], i32 [[I_025]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[I_025]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i16, ptr [[Y:%.*]], i32 [[I_025]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[CONV3:%.*]] = sext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[T_024]], [[CONV3]]
 ; CHECK-NEXT:    [[ADD4:%.*]] = or disjoint i32 [[I_025]], 1
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[ADD4]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i16, ptr [[X]], i32 [[ADD4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX5]], align 2
 ; CHECK-NEXT:    [[CONV6:%.*]] = sext i16 [[TMP2]] to i32
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, ptr [[Y]], i32 [[ADD4]]
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i16, ptr [[Y]], i32 [[ADD4]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX8]], align 2
 ; CHECK-NEXT:    [[CONV9:%.*]] = sext i16 [[TMP3]] to i32
 ; CHECK-NEXT:    [[MUL10:%.*]] = mul nsw i32 [[CONV9]], [[CONV6]]
@@ -1677,7 +1677,7 @@ define i64 @test_std_q31(ptr %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[S_014:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY1]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[I_013:%.*]] = phi i32 [ [[ADD4:%.*]], [[FOR_BODY1]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[T_012:%.*]] = phi i64 [ [[ADD3]], [[FOR_BODY1]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[I_013]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[X:%.*]], i32 [[I_013]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[TMP0]], 8
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[SHR]] to i64

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 43366314f52477..12eb3bd03348a4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -39,9 +39,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 8.000000e+00)
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.200000e+01)
 ; AUTO_VEC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
-; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 64
-; AUTO_VEC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 96
+; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
+; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 64
+; AUTO_VEC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 96
 ; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4
 ; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4
@@ -86,7 +86,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ITER_CHECK]] ], [ [[IND_END8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -147,35 +147,35 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
-; AUTO_VEC-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
 ; AUTO_VEC-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX_1]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_1:%.*]] = fadd float [[CONV1]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
-; AUTO_VEC-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
 ; AUTO_VEC-NEXT:    store float [[CONV1_1]], ptr [[ARRAYIDX_2]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_2:%.*]] = fadd float [[CONV1_1]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
-; AUTO_VEC-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
 ; AUTO_VEC-NEXT:    store float [[CONV1_2]], ptr [[ARRAYIDX_3]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_3:%.*]] = fadd float [[CONV1_2]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
-; AUTO_VEC-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
 ; AUTO_VEC-NEXT:    store float [[CONV1_3]], ptr [[ARRAYIDX_4]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_4:%.*]] = fadd float [[CONV1_3]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
-; AUTO_VEC-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
 ; AUTO_VEC-NEXT:    store float [[CONV1_4]], ptr [[ARRAYIDX_5]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_5:%.*]] = fadd float [[CONV1_4]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
-; AUTO_VEC-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
 ; AUTO_VEC-NEXT:    store float [[CONV1_5]], ptr [[ARRAYIDX_6]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_6:%.*]] = fadd float [[CONV1_5]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
-; AUTO_VEC-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
 ; AUTO_VEC-NEXT:    store float [[CONV1_6]], ptr [[ARRAYIDX_7]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
@@ -191,7 +191,7 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[X_06_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
 ; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
@@ -411,9 +411,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02)
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02)
 ; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 32
-; AUTO_VEC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 64
-; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 96
+; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 32
+; AUTO_VEC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 64
+; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 96
 ; AUTO_VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
 ; AUTO_VEC-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4
 ; AUTO_VEC-NEXT:    [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
@@ -470,7 +470,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[ADD3:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ITER_CHECK]] ], [ [[IND_END11]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IND_END9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[P]], i64 [[INDVARS_IV]]
 ; AUTO_VEC-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]]
 ; AUTO_VEC-NEXT:    store float [[ADD]], ptr [[ARRAYIDX]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
index 3338cc85772e43..203beff4213200 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
@@ -20,14 +20,14 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
 ; SSE-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4
 ; SSE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; SSE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; SSE-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4
-; SSE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; SSE-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; SSE-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
-; SSE-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
+; SSE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4
+; SSE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; SSE-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; SSE-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
+; SSE-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]]
 ; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16
 ; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4
 ; SSE-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
 ; SSE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -62,24 +62,24 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; AVX1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]]
 ; AVX1-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4
 ; AVX1-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; AVX1-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX1-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
-; AVX1-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; AVX1-NEXT:    [[STRIDED_VEC8:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX1-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4
-; AVX1-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; AVX1-NEXT:    [[STRIDED_VEC9:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX1-NEXT:    [[WIDE_VEC3:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4
-; AVX1-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <8 x i32> [[WIDE_VEC3]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; AVX1-NEXT:    [[STRIDED_VEC10:%.*]] = shufflevector <8 x i32> [[WIDE_VEC3]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX1-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
-; AVX1-NEXT:    [[TMP11:%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
-; AVX1-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
-; AVX1-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
+; AVX1-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX1-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
+; AVX1-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; AVX1-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX1-NEXT:    [[WIDE_VEC5:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4
+; AVX1-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <8 x i32> [[WIDE_VEC5]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; AVX1-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <8 x i32> [[WIDE_VEC5]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX1-NEXT:    [[WIDE_VEC8:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4
+; AVX1-NEXT:    [[STRIDED_VEC9:%.*]] = shufflevector <8 x i32> [[WIDE_VEC8]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; AVX1-NEXT:    [[STRIDED_VEC10:%.*]] = shufflevector <8 x i32> [[WIDE_VEC8]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; AVX1-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
+; AVX1-NEXT:    [[TMP11:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]]
+; AVX1-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC6]]
+; AVX1-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC9]]
 ; AVX1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; AVX1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 16
-; AVX1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 32
-; AVX1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 48
+; AVX1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 16
+; AVX1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 32
+; AVX1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 48
 ; AVX1-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP14]], align 4
 ; AVX1-NEXT:    store <4 x i32> [[TMP11]], ptr [[TMP15]], align 4
 ; AVX1-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP16]], align 4
@@ -116,24 +116,24 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]]
 ; AVX2-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4
 ; AVX2-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; AVX2-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX2-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4
-; AVX2-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; AVX2-NEXT:    [[STRIDED_VEC8:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX2-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP8]], align 4
-; AVX2-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; AVX2-NEXT:    [[STRIDED_VEC9:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX2-NEXT:    [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4
-; AVX2-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i32> [[WIDE_VEC3]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; AVX2-NEXT:    [[STRIDED_VEC10:%.*]] = shufflevector <16 x i32> [[WIDE_VEC3]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX2-NEXT:    [[TMP10:%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC]]
-; AVX2-NEXT:    [[TMP11:%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
-; AVX2-NEXT:    [[TMP12:%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
-; AVX2-NEXT:    [[TMP13:%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
+; AVX2-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; AVX2-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4
+; AVX2-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; AVX2-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; AVX2-NEXT:    [[WIDE_VEC5:%.*]] = load <16 x i32>, ptr [[TMP8]], align 4
+; AVX2-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i32> [[WIDE_VEC5]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; AVX2-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i32> [[WIDE_VEC5]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; AVX2-NEXT:    [[WIDE_VEC8:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4
+; AVX2-NEXT:    [[STRIDED_VEC9:%.*]] = shufflevector <16 x i32> [[WIDE_VEC8]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; AVX2-NEXT:    [[STRIDED_VEC10:%.*]] = shufflevector <16 x i32> [[WIDE_VEC8]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; AVX2-NEXT:    [[TMP10:%.*]] = add nsw <8 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
+; AVX2-NEXT:    [[TMP11:%.*]] = add nsw <8 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]]
+; AVX2-NEXT:    [[TMP12:%.*]] = add nsw <8 x i32> [[STRIDED_VEC7]], [[STRIDED_VEC6]]
+; AVX2-NEXT:    [[TMP13:%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC9]]
 ; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 32
-; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 64
-; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 96
+; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 32
+; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 64
+; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 96
 ; AVX2-NEXT:    store <8 x i32> [[TMP10]], ptr [[TMP14]], align 4
 ; AVX2-NEXT:    store <8 x i32> [[TMP11]], ptr [[TMP15]], align 4
 ; AVX2-NEXT:    store <8 x i32> [[TMP12]], ptr [[TMP16]], align 4
@@ -158,13 +158,13 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; ATOM:       for.body:
 ; ATOM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; ATOM-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
-; ATOM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; ATOM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP0]]
 ; ATOM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; ATOM-NEXT:    [[TMP2:%.*]] = or disjoint i64 [[TMP0]], 1
-; ATOM-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; ATOM-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP2]]
 ; ATOM-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; ATOM-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], [[TMP1]]
-; ATOM-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; ATOM-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; ATOM-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX6]], align 4
 ; ATOM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; ATOM-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
index c9a47a60866f17..07f17ec8c2cfae 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll
@@ -26,20 +26,20 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775792
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <16 x ptr> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <16 x ptr> [[BROADCAST_SPLAT]], zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT5:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT4]], <16 x i32> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP1]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP3]], <16 x i32> poison), !alias.scope [[META3]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[BROADCAST_SPLAT]], i32 4, <16 x i1> [[TMP1]], <16 x i32> poison), !alias.scope [[META3]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x i32> [[PREDPHI]], i64 15
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -50,31 +50,31 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) {
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_VEC7:%.*]] = and i64 [[SMAX2]], 9223372036854775800
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT10]], <8 x ptr> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <8 x ptr> [[BROADCAST_SPLAT11]], zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT13:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT12]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT8]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x ptr> [[BROADCAST_SPLAT9]], zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT11]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX9]]
-; CHECK-NEXT:    store <8 x i32> [[BROADCAST_SPLAT13]], ptr [[TMP5]], align 4, !alias.scope [[META8:![0-9]+]], !noalias [[META11:![0-9]+]]
-; CHECK-NEXT:    [[INDEX_NEXT16]] = add nuw i64 [[INDEX9]], 8
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC7]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX10]]
+; CHECK-NEXT:    store <8 x i32> [[BROADCAST_SPLAT12]], ptr [[TMP6]], align 4, !alias.scope [[META8:![0-9]+]], !noalias [[META11:![0-9]+]]
+; CHECK-NEXT:    [[INDEX_NEXT15]] = add nuw i64 [[INDEX10]], 8
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC7]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER14:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[BROADCAST_SPLAT11]], i32 4, <8 x i1> [[TMP7]], <8 x i32> poison), !alias.scope [[META11]]
-; CHECK-NEXT:    [[PREDPHI15:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[WIDE_MASKED_GATHER14]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[PREDPHI15]], i64 7
-; CHECK-NEXT:    [[CMP_N8:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC7]]
-; CHECK-NEXT:    br i1 [[CMP_N8]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER13:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[BROADCAST_SPLAT9]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison), !alias.scope [[META11]]
+; CHECK-NEXT:    [[PREDPHI14:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[WIDE_MASKED_GATHER13]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[PREDPHI14]], i64 7
+; CHECK-NEXT:    [[CMP_N16:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC7]]
+; CHECK-NEXT:    br i1 [[CMP_N16]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[A]], null
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[I1]], align 4
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[LATCH]], label [[COND_LOAD:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
index 58c6827128feb2..19978271ac0ce4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
@@ -35,9 +35,9 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b)
 ; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 128
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 192
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 128
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 192
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP3]], align 8, !alias.scope [[META0]]
@@ -88,7 +88,7 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b)
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
 ; CHECK-NEXT:    [[T0:%.*]] = phi i32 [ [[T3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX19]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[T2:%.*]] = load i32, ptr [[T1]], align 8
 ; CHECK-NEXT:    [[T3]] = add i32 [[T0]], [[T2]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[A]], align 4
@@ -214,7 +214,7 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[T2:%.*]] = load i32, ptr [[T1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[T1]], align 4
@@ -339,13 +339,13 @@ define void @variant_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
-; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[T2:%.*]] = load i32, ptr [[T1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[T1]], align 4
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_STORE:%.*]], label [[LATCH]]
 ; CHECK:       cond_store:
-; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[I]]
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i64 [[I]]
 ; CHECK-NEXT:    [[T4:%.*]] = load i32, ptr [[T3]], align 8
 ; CHECK-NEXT:    store i32 [[T4]], ptr [[A]], align 4
 ; CHECK-NEXT:    br label [[LATCH]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index dbe39964eb2291..7c467c264843a3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -21,82 +21,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; O1-NEXT:  entry:
 ; O1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O1-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O1-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O1-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O1-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O1-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O1-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O1-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O1-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O1-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O1-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O1-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O1-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O1-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O1-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O1-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O1-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O1-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O1-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O1-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O1-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O1-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O1-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O1-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O1-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O1-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O1-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O1-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O1-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -106,82 +106,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; O2-NEXT:  entry:
 ; O2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -191,82 +191,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; O3-NEXT:  entry:
 ; O3-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O3-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O3-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O3-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O3-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O3-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O3-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O3-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O3-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O3-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O3-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O3-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O3-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O3-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O3-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O3-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O3-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O3-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O3-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O3-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O3-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O3-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O3-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O3-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O3-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O3-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -276,82 +276,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; O3DEFAULT-NEXT:  entry:
 ; O3DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O3DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O3DEFAULT-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O3DEFAULT-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O3DEFAULT-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O3DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O3DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O3DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O3DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O3DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O3DEFAULT-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O3DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O3DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O3DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O3DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O3DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O3DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O3DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O3DEFAULT-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O3DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O3DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O3DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O3DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O3DEFAULT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O3DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O3DEFAULT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O3DEFAULT-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O3DEFAULT-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O3DEFAULT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O3DEFAULT-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O3DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O3DEFAULT-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O3DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O3DEFAULT-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O3DEFAULT-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O3DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O3DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O3DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O3DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O3DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O3DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O3DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O3DEFAULT-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O3DEFAULT-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O3DEFAULT-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O3DEFAULT-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O3DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O3DEFAULT-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O3DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O3DEFAULT-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O3DEFAULT-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O3DEFAULT-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O3DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O3DEFAULT-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O3DEFAULT-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O3DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O3DEFAULT-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O3DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O3DEFAULT-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O3DEFAULT-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O3DEFAULT-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O3DEFAULT-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O3DEFAULT-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -361,82 +361,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; Os-NEXT:  entry:
 ; Os-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; Os-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; Os-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; Os-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; Os-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; Os-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; Os-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; Os-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; Os-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; Os-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; Os-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; Os-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; Os-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; Os-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; Os-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; Os-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; Os-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; Os-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; Os-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; Os-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; Os-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; Os-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; Os-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; Os-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; Os-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; Os-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; Os-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; Os-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; Os-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; Os-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; Os-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; Os-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; Os-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; Os-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; Os-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; Os-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; Os-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; Os-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; Os-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; Os-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; Os-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; Os-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; Os-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; Os-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; Os-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; Os-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; Os-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; Os-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; Os-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; Os-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; Os-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; Os-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; Os-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; Os-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; Os-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; Os-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; Os-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; Os-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; Os-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; Os-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; Os-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; Os-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; Os-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; Os-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; Os-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; Os-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; Os-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; Os-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; Os-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; Os-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; Os-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; Os-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; Os-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; Os-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; Os-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; Os-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; Os-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; Os-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; Os-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -446,82 +446,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; Oz-NEXT:  entry:
 ; Oz-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; Oz-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; Oz-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; Oz-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; Oz-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; Oz-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; Oz-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; Oz-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; Oz-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; Oz-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; Oz-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; Oz-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; Oz-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; Oz-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; Oz-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; Oz-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; Oz-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; Oz-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; Oz-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; Oz-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; Oz-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; Oz-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; Oz-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; Oz-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; Oz-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; Oz-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; Oz-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; Oz-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; Oz-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; Oz-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; Oz-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; Oz-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; Oz-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; Oz-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; Oz-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; Oz-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; Oz-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; Oz-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; Oz-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; Oz-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; Oz-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; Oz-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; Oz-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; Oz-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; Oz-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; Oz-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; Oz-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; Oz-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; Oz-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; Oz-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; Oz-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; Oz-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; Oz-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; Oz-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; Oz-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; Oz-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; Oz-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; Oz-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; Oz-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; Oz-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; Oz-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; Oz-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; Oz-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; Oz-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; Oz-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; Oz-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; Oz-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; Oz-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; Oz-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; Oz-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; Oz-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; Oz-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; Oz-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; Oz-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; Oz-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; Oz-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; Oz-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; Oz-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; Oz-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; Oz-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; Oz-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; Oz-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; Oz-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -531,82 +531,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; O1VEC2-NEXT:  entry:
 ; O1VEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O1VEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O1VEC2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O1VEC2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O1VEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O1VEC2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O1VEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O1VEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O1VEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O1VEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O1VEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O1VEC2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O1VEC2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O1VEC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O1VEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O1VEC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O1VEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O1VEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O1VEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O1VEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O1VEC2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O1VEC2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O1VEC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O1VEC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O1VEC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O1VEC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O1VEC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O1VEC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O1VEC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O1VEC2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O1VEC2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O1VEC2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O1VEC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O1VEC2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O1VEC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O1VEC2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O1VEC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O1VEC2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O1VEC2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O1VEC2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O1VEC2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O1VEC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O1VEC2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O1VEC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O1VEC2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O1VEC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O1VEC2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O1VEC2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O1VEC2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O1VEC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O1VEC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O1VEC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O1VEC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O1VEC2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O1VEC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O1VEC2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O1VEC2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O1VEC2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O1VEC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O1VEC2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O1VEC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O1VEC2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O1VEC2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O1VEC2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O1VEC2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O1VEC2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O1VEC2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O1VEC2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O1VEC2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O1VEC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O1VEC2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O1VEC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O1VEC2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O1VEC2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -616,82 +616,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; OzVEC2-NEXT:  entry:
 ; OzVEC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; OzVEC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; OzVEC2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; OzVEC2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; OzVEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; OzVEC2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; OzVEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; OzVEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; OzVEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; OzVEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; OzVEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; OzVEC2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; OzVEC2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; OzVEC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; OzVEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; OzVEC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; OzVEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; OzVEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; OzVEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; OzVEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; OzVEC2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; OzVEC2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; OzVEC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; OzVEC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; OzVEC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; OzVEC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; OzVEC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; OzVEC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; OzVEC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; OzVEC2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; OzVEC2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; OzVEC2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; OzVEC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; OzVEC2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; OzVEC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; OzVEC2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; OzVEC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; OzVEC2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; OzVEC2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; OzVEC2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; OzVEC2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; OzVEC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; OzVEC2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; OzVEC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; OzVEC2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; OzVEC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; OzVEC2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; OzVEC2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; OzVEC2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; OzVEC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; OzVEC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; OzVEC2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; OzVEC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; OzVEC2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; OzVEC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; OzVEC2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; OzVEC2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; OzVEC2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; OzVEC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; OzVEC2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; OzVEC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; OzVEC2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; OzVEC2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; OzVEC2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; OzVEC2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; OzVEC2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; OzVEC2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; OzVEC2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; OzVEC2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; OzVEC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; OzVEC2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; OzVEC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; OzVEC2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; OzVEC2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -701,82 +701,82 @@ define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; O3DIS-NEXT:  entry:
 ; O3DIS-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O3DIS-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O3DIS-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O3DIS-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O3DIS-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O3DIS-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O3DIS-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O3DIS-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O3DIS-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O3DIS-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O3DIS-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O3DIS-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O3DIS-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O3DIS-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O3DIS-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O3DIS-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O3DIS-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O3DIS-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O3DIS-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O3DIS-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O3DIS-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O3DIS-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O3DIS-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O3DIS-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O3DIS-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O3DIS-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O3DIS-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O3DIS-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O3DIS-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O3DIS-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O3DIS-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O3DIS-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O3DIS-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O3DIS-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O3DIS-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O3DIS-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O3DIS-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O3DIS-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O3DIS-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O3DIS-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O3DIS-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O3DIS-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O3DIS-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O3DIS-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O3DIS-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O3DIS-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O3DIS-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O3DIS-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O3DIS-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O3DIS-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O3DIS-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O3DIS-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O3DIS-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O3DIS-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O3DIS-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O3DIS-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O3DIS-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O3DIS-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O3DIS-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O3DIS-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O3DIS-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O3DIS-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O3DIS-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O3DIS-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O3DIS-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O3DIS-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O3DIS-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O3DIS-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O3DIS-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O3DIS-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O3DIS-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O3DIS-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O3DIS-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O3DIS-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O3DIS-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O3DIS-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O3DIS-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O3DIS-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O3DIS-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O3DIS-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O3DIS-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O3DIS-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O3DIS-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -807,10 +807,10 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O1-NEXT:    br label [[FOR_BODY:%.*]]
 ; O1:       for.body:
 ; O1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O1-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
@@ -823,82 +823,82 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O2-NEXT:  entry:
 ; O2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O2-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O2-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O2-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O2-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O2-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O2-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O2-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O2-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O2-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O2-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O2-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O2-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O2-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O2-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O2-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O2-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O2-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O2-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O2-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O2-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O2-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O2-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O2-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O2-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -908,82 +908,82 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O3-NEXT:  entry:
 ; O3-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O3-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O3-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O3-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O3-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O3-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O3-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O3-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O3-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O3-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O3-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O3-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O3-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O3-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O3-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O3-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O3-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O3-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O3-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O3-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O3-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O3-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O3-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O3-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O3-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O3-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O3-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O3-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O3-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O3-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O3-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O3-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O3-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O3-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O3-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O3-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O3-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O3-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O3-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O3-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O3-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O3-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O3-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O3-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O3-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -993,82 +993,82 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O3DEFAULT-NEXT:  entry:
 ; O3DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; O3DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; O3DEFAULT-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; O3DEFAULT-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; O3DEFAULT-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; O3DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; O3DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O3DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; O3DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O3DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; O3DEFAULT-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; O3DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O3DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; O3DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; O3DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O3DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; O3DEFAULT-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O3DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; O3DEFAULT-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; O3DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O3DEFAULT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; O3DEFAULT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; O3DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O3DEFAULT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; O3DEFAULT-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O3DEFAULT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; O3DEFAULT-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; O3DEFAULT-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O3DEFAULT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; O3DEFAULT-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; O3DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O3DEFAULT-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; O3DEFAULT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O3DEFAULT-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; O3DEFAULT-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; O3DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O3DEFAULT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; O3DEFAULT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; O3DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O3DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; O3DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O3DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; O3DEFAULT-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; O3DEFAULT-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O3DEFAULT-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; O3DEFAULT-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; O3DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; O3DEFAULT-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; O3DEFAULT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; O3DEFAULT-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; O3DEFAULT-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; O3DEFAULT-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; O3DEFAULT-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; O3DEFAULT-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; O3DEFAULT-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; O3DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; O3DEFAULT-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; O3DEFAULT-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; O3DEFAULT-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; O3DEFAULT-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; O3DEFAULT-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; O3DEFAULT-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; O3DEFAULT-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; O3DEFAULT-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; O3DEFAULT-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; O3DEFAULT-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -1078,82 +1078,82 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; Os-NEXT:  entry:
 ; Os-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
 ; Os-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; Os-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 16
+; Os-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
 ; Os-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; Os-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 16
+; Os-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
 ; Os-NEXT:    store <4 x i32> [[TMP1]], ptr [[A]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
-; Os-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; Os-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
+; Os-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; Os-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
 ; Os-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; Os-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
-; Os-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; Os-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
+; Os-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; Os-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
-; Os-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; Os-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
+; Os-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; Os-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
 ; Os-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; Os-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
-; Os-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; Os-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
+; Os-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; Os-NEXT:    store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; Os-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; Os-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
+; Os-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; Os-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
 ; Os-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
 ; Os-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
-; Os-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; Os-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
+; Os-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; Os-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; Os-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; Os-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
+; Os-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; Os-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
 ; Os-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
 ; Os-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
-; Os-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; Os-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
+; Os-NEXT:    [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; Os-NEXT:    store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
-; Os-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; Os-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
+; Os-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; Os-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
 ; Os-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
 ; Os-NEXT:    [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
-; Os-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; Os-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
+; Os-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; Os-NEXT:    store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
-; Os-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 192
-; Os-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 208
+; Os-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
+; Os-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
 ; Os-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
 ; Os-NEXT:    [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 192
-; Os-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 208
+; Os-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
+; Os-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
 ; Os-NEXT:    store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
-; Os-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 224
-; Os-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 240
+; Os-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
+; Os-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
 ; Os-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
 ; Os-NEXT:    [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
 ; Os-NEXT:    [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
 ; Os-NEXT:    [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
-; Os-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 224
-; Os-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 240
+; Os-NEXT:    [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
+; Os-NEXT:    [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
 ; Os-NEXT:    store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
 ; Os-NEXT:    store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
 ; Os-NEXT:    [[TMP46:%.*]] = load i32, ptr [[A]], align 4
@@ -1164,10 +1164,10 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; Oz-NEXT:    br label [[FOR_BODY:%.*]]
 ; Oz:       for.body:
 ; Oz-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; Oz-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; Oz-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; Oz-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; Oz-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; Oz-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; Oz-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; Oz-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; Oz-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; Oz-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
@@ -1186,21 +1186,21 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O1VEC2:       vector.body:
 ; O1VEC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; O1VEC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; O1VEC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
-; O1VEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
-; O1VEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4
-; O1VEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
-; O1VEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
-; O1VEC2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; O1VEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; O1VEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
-; O1VEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4
-; O1VEC2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP10]], align 4
-; O1VEC2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP11]], align 4
+; O1VEC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; O1VEC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; O1VEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
+; O1VEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; O1VEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; O1VEC2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; O1VEC2-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
+; O1VEC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; O1VEC2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
+; O1VEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4
+; O1VEC2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
+; O1VEC2-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4
 ; O1VEC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; O1VEC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; O1VEC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; O1VEC2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; O1VEC2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; O1VEC2:       middle.block:
 ; O1VEC2-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; O1VEC2:       scalar.ph:
@@ -1208,17 +1208,17 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O1VEC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; O1VEC2:       for.body:
 ; O1VEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O1VEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
-; O1VEC2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; O1VEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[N]]
-; O1VEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; O1VEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
+; O1VEC2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; O1VEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]]
+; O1VEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
 ; O1VEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O1VEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O1VEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
 ; O1VEC2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; O1VEC2:       for.end:
-; O1VEC2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
-; O1VEC2-NEXT:    ret i32 [[TMP14]]
+; O1VEC2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
+; O1VEC2-NEXT:    ret i32 [[TMP11]]
 ;
 ; OzVEC2-LABEL: @nopragma(
 ; OzVEC2-NEXT:  entry:
@@ -1230,21 +1230,21 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; OzVEC2:       vector.body:
 ; OzVEC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; OzVEC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; OzVEC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
-; OzVEC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
-; OzVEC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4
-; OzVEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
-; OzVEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
-; OzVEC2-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
-; OzVEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; OzVEC2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
-; OzVEC2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4
-; OzVEC2-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP10]], align 4
-; OzVEC2-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP11]], align 4
+; OzVEC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; OzVEC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; OzVEC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
+; OzVEC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; OzVEC2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; OzVEC2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; OzVEC2-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
+; OzVEC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; OzVEC2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
+; OzVEC2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4
+; OzVEC2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
+; OzVEC2-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4
 ; OzVEC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; OzVEC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
-; OzVEC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; OzVEC2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; OzVEC2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; OzVEC2:       middle.block:
 ; OzVEC2-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; OzVEC2:       scalar.ph:
@@ -1252,27 +1252,27 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; OzVEC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; OzVEC2:       for.body:
 ; OzVEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; OzVEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
-; OzVEC2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; OzVEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP13]], [[N]]
-; OzVEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; OzVEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
+; OzVEC2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; OzVEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]]
+; OzVEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
 ; OzVEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; OzVEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; OzVEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
 ; OzVEC2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; OzVEC2:       for.end:
-; OzVEC2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[A]], align 4
-; OzVEC2-NEXT:    ret i32 [[TMP14]]
+; OzVEC2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[A]], align 4
+; OzVEC2-NEXT:    ret i32 [[TMP11]]
 ;
 ; O3DIS-LABEL: @nopragma(
 ; O3DIS-NEXT:  entry:
 ; O3DIS-NEXT:    br label [[FOR_BODY:%.*]]
 ; O3DIS:       for.body:
 ; O3DIS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O3DIS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O3DIS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O3DIS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O3DIS-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O3DIS-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O3DIS-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O3DIS-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O3DIS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O3DIS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
@@ -1306,10 +1306,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O1-NEXT:    br label [[FOR_BODY:%.*]]
 ; O1:       for.body:
 ; O1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O1-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1323,10 +1323,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O2-NEXT:    br label [[FOR_BODY:%.*]]
 ; O2:       for.body:
 ; O2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1340,10 +1340,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O3-NEXT:    br label [[FOR_BODY:%.*]]
 ; O3:       for.body:
 ; O3-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O3-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O3-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O3-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O3-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1359,58 +1359,58 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O3DEFAULT-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; O3DEFAULT-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP3]], ptr [[A:%.*]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 16
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
+; O3DEFAULT-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 16
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 16
 ; O3DEFAULT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_4]], align 4
 ; O3DEFAULT-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP5]], ptr [[ARRAYIDX2_4]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 32
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 32
+; O3DEFAULT-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
 ; O3DEFAULT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_8]], align 4
 ; O3DEFAULT-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP7]], ptr [[ARRAYIDX2_8]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 48
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 48
+; O3DEFAULT-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_12:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
 ; O3DEFAULT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_12]], align 4
 ; O3DEFAULT-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP9]], ptr [[ARRAYIDX2_12]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 64
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 64
+; O3DEFAULT-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_16:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
 ; O3DEFAULT-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_16]], align 4
 ; O3DEFAULT-NEXT:    [[TMP11:%.*]] = add nsw <4 x i32> [[TMP10]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP11]], ptr [[ARRAYIDX2_16]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 80
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 80
+; O3DEFAULT-NEXT:    [[ARRAYIDX_20:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
 ; O3DEFAULT-NEXT:    [[TMP12:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_20]], align 4
 ; O3DEFAULT-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP13]], ptr [[ARRAYIDX2_20]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 96
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 96
+; O3DEFAULT-NEXT:    [[ARRAYIDX_24:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_24:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
 ; O3DEFAULT-NEXT:    [[TMP14:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_24]], align 4
 ; O3DEFAULT-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP15]], ptr [[ARRAYIDX2_24]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 112
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 112
+; O3DEFAULT-NEXT:    [[ARRAYIDX_28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_28:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
 ; O3DEFAULT-NEXT:    [[TMP16:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_28]], align 4
 ; O3DEFAULT-NEXT:    [[TMP17:%.*]] = add nsw <4 x i32> [[TMP16]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP17]], ptr [[ARRAYIDX2_28]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 128
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 128
+; O3DEFAULT-NEXT:    [[ARRAYIDX_32:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
 ; O3DEFAULT-NEXT:    [[TMP18:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_32]], align 4
 ; O3DEFAULT-NEXT:    [[TMP19:%.*]] = add nsw <4 x i32> [[TMP18]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP19]], ptr [[ARRAYIDX2_32]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 144
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 144
+; O3DEFAULT-NEXT:    [[ARRAYIDX_36:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_36:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
 ; O3DEFAULT-NEXT:    [[TMP20:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_36]], align 4
 ; O3DEFAULT-NEXT:    [[TMP21:%.*]] = add nsw <4 x i32> [[TMP20]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP21]], ptr [[ARRAYIDX2_36]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 160
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 160
+; O3DEFAULT-NEXT:    [[ARRAYIDX_40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_40:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
 ; O3DEFAULT-NEXT:    [[TMP22:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_40]], align 4
 ; O3DEFAULT-NEXT:    [[TMP23:%.*]] = add nsw <4 x i32> [[TMP22]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP23]], ptr [[ARRAYIDX2_40]], align 4
-; O3DEFAULT-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 176
-; O3DEFAULT-NEXT:    [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 176
+; O3DEFAULT-NEXT:    [[ARRAYIDX_44:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
+; O3DEFAULT-NEXT:    [[ARRAYIDX2_44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
 ; O3DEFAULT-NEXT:    [[TMP24:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_44]], align 4
 ; O3DEFAULT-NEXT:    [[TMP25:%.*]] = add nsw <4 x i32> [[TMP24]], [[TMP2]]
 ; O3DEFAULT-NEXT:    store <4 x i32> [[TMP25]], ptr [[ARRAYIDX2_44]], align 4
@@ -1422,10 +1422,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; Os-NEXT:    br label [[FOR_BODY:%.*]]
 ; Os:       for.body:
 ; Os-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; Os-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; Os-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; Os-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; Os-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; Os-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; Os-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; Os-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; Os-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; Os-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1439,10 +1439,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; Oz-NEXT:    br label [[FOR_BODY:%.*]]
 ; Oz:       for.body:
 ; Oz-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; Oz-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; Oz-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; Oz-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; Oz-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; Oz-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; Oz-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; Oz-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; Oz-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; Oz-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1456,10 +1456,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O1VEC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; O1VEC2:       for.body:
 ; O1VEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O1VEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O1VEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O1VEC2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O1VEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O1VEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O1VEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O1VEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O1VEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O1VEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1473,10 +1473,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; OzVEC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; OzVEC2:       for.body:
 ; OzVEC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; OzVEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; OzVEC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; OzVEC2-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; OzVEC2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; OzVEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; OzVEC2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; OzVEC2-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; OzVEC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; OzVEC2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
@@ -1490,10 +1490,10 @@ define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b
 ; O3DIS-NEXT:    br label [[FOR_BODY:%.*]]
 ; O3DIS:       for.body:
 ; O3DIS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; O3DIS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; O3DIS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
 ; O3DIS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; O3DIS-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
-; O3DIS-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; O3DIS-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
 ; O3DIS-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; O3DIS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; O3DIS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index 89febbccfd8d29..d00edeb1c9e6d3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -10,8 +10,8 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[ITER_CHECK:%.*]]
 ; CHECK:       iter.check:
-; CHECK-NEXT:    [[DOT10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0:%.*]], i64 16
-; CHECK-NEXT:    [[DOT12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 16
+; CHECK-NEXT:    [[DOT10:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP0:%.*]], i64 16
+; CHECK-NEXT:    [[DOT12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP1:%.*]], i64 16
 ; CHECK-NEXT:    [[UMAX2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2:%.*]], i64 1)
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
@@ -33,17 +33,17 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP5]], i64 32
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP5]], i64 64
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP5]], i64 96
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP5]], i64 32
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP5]], i64 64
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP5]], i64 96
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP6]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP8]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP9]], i64 32
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP9]], i64 64
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP9]], i64 96
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP9]], i64 32
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP9]], i64 64
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP9]], i64 96
 ; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP9]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
 ; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP10]], align 8, !alias.scope [[META3]], !noalias [[META0]]
 ; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP11]], align 8, !alias.scope [[META3]], !noalias [[META0]]

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index d490055b4ff39a..f29428c51c636b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -45,7 +45,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -55,7 +55,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -65,7 +65,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -75,7 +75,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -85,7 +85,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -95,7 +95,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -105,7 +105,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -115,7 +115,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -157,9 +157,9 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       if.then:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_09]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[MUL]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[IX_09]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[IX_09]]
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP5]], ptr [[ARRAYIDX3]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       for.inc:
@@ -218,7 +218,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -228,7 +228,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -238,7 +238,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -248,7 +248,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -258,7 +258,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -268,7 +268,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -278,7 +278,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -288,7 +288,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -407,7 +407,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -417,7 +417,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP11]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -427,7 +427,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP17]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -437,7 +437,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP23]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -447,7 +447,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP29]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -457,7 +457,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP35]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -467,7 +467,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP41]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -477,7 +477,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP47]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
@@ -794,21 +794,21 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP7]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP7]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP11]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -840,7 +840,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP0]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP0]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP1]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
@@ -911,7 +911,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -921,7 +921,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -931,7 +931,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -941,7 +941,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -951,7 +951,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -961,7 +961,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -971,7 +971,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -981,7 +981,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -1014,7 +1014,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or disjoint <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[TMP3]], <16 x i8> poison)
@@ -1093,7 +1093,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -1103,7 +1103,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -1113,7 +1113,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -1123,7 +1123,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -1133,7 +1133,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -1143,7 +1143,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -1153,7 +1153,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -1163,7 +1163,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -1174,7 +1174,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP52]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
@@ -1184,7 +1184,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP58]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
@@ -1194,7 +1194,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP64]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
@@ -1204,7 +1204,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP70]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
@@ -1214,7 +1214,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP76]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
@@ -1224,7 +1224,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP82]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
@@ -1234,7 +1234,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP88]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
@@ -1244,7 +1244,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP94]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
@@ -1255,7 +1255,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -1264,7 +1264,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
@@ -1273,7 +1273,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
@@ -1282,7 +1282,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
@@ -1291,7 +1291,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
@@ -1300,7 +1300,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
@@ -1309,7 +1309,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
@@ -1318,7 +1318,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
@@ -1328,7 +1328,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
@@ -1337,7 +1337,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
@@ -1346,7 +1346,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
@@ -1355,7 +1355,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
@@ -1364,7 +1364,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
@@ -1373,7 +1373,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
@@ -1382,7 +1382,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
@@ -1391,7 +1391,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
@@ -1498,7 +1498,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -1508,7 +1508,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -1518,7 +1518,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -1528,7 +1528,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -1538,7 +1538,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -1548,7 +1548,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -1558,7 +1558,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -1568,7 +1568,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -1579,7 +1579,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP52]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
@@ -1589,7 +1589,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP58]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
@@ -1599,7 +1599,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP64]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
@@ -1609,7 +1609,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP70]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
@@ -1619,7 +1619,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP76]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
@@ -1629,7 +1629,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP82]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
@@ -1639,7 +1639,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP88]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
@@ -1649,7 +1649,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP94]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
@@ -1660,7 +1660,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -1669,7 +1669,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
@@ -1678,7 +1678,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
@@ -1687,7 +1687,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
@@ -1696,7 +1696,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
@@ -1705,7 +1705,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
@@ -1714,7 +1714,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
@@ -1723,7 +1723,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
@@ -1733,7 +1733,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
@@ -1742,7 +1742,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
@@ -1751,7 +1751,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
@@ -1760,7 +1760,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
@@ -1769,7 +1769,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
@@ -1778,7 +1778,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
@@ -1787,7 +1787,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
@@ -1796,7 +1796,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
@@ -1823,7 +1823,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -1833,7 +1833,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if1:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -1843,7 +1843,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if3:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -1853,7 +1853,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if5:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -1863,7 +1863,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if7:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -1873,7 +1873,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if9:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -1883,7 +1883,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if11:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -1893,7 +1893,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if13:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -1904,7 +1904,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if15:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP52]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
@@ -1914,7 +1914,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if17:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP58]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
@@ -1924,7 +1924,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if19:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP64]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
@@ -1934,7 +1934,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if21:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP70]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
@@ -1944,7 +1944,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if23:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP76]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
@@ -1954,7 +1954,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if25:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP82]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
@@ -1964,7 +1964,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if27:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP88]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
@@ -1974,7 +1974,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if29:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP94]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
@@ -1985,7 +1985,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -1994,7 +1994,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if31:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP105]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
@@ -2003,7 +2003,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if33:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP109]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
@@ -2012,7 +2012,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if35:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP113]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
@@ -2021,7 +2021,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if37:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP117]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
@@ -2030,7 +2030,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if39:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP121]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
@@ -2039,7 +2039,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if41:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP125]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
@@ -2048,7 +2048,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if43:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP129]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
@@ -2058,7 +2058,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if45:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
@@ -2067,7 +2067,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if47:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
@@ -2076,7 +2076,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if49:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
@@ -2085,7 +2085,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if51:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
@@ -2094,7 +2094,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if53:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
@@ -2103,7 +2103,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if55:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
@@ -2112,7 +2112,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if57:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
@@ -2121,7 +2121,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if59:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
@@ -2218,7 +2218,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -2228,7 +2228,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP11]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -2238,7 +2238,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP17]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -2248,7 +2248,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP23]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -2258,7 +2258,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP29]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -2268,7 +2268,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP35]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -2278,7 +2278,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP41]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -2288,7 +2288,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP47]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
@@ -2299,7 +2299,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP54]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP54]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = insertelement <8 x i8> poison, i8 [[TMP56]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
@@ -2309,7 +2309,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP60]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP60]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = load i8, ptr [[TMP61]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP62]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
@@ -2319,7 +2319,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP65]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP66]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP66]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP68]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
@@ -2329,7 +2329,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP71]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP72]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP72]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = insertelement <8 x i8> [[TMP70]], i8 [[TMP74]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
@@ -2339,7 +2339,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP78]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP78]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = load i8, ptr [[TMP79]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = insertelement <8 x i8> [[TMP76]], i8 [[TMP80]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
@@ -2349,7 +2349,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP83]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP84]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP84]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = load i8, ptr [[TMP85]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = insertelement <8 x i8> [[TMP82]], i8 [[TMP86]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
@@ -2359,7 +2359,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP90]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP90]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = insertelement <8 x i8> [[TMP88]], i8 [[TMP92]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
@@ -2369,7 +2369,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP95]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP96]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP96]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = load i8, ptr [[TMP97]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = insertelement <8 x i8> [[TMP94]], i8 [[TMP98]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
@@ -2380,7 +2380,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP102]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP103]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds nuw i8, ptr [[Q:%.*]], i32 [[TMP103]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i8> [[TMP101]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP105]], ptr [[TMP104]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -2389,7 +2389,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP106]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP107]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP107]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i8> [[TMP101]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP109]], ptr [[TMP108]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
@@ -2398,7 +2398,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP110]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP111]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP111]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i8> [[TMP101]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP113]], ptr [[TMP112]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
@@ -2407,7 +2407,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP114]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP115]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP115]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i8> [[TMP101]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP117]], ptr [[TMP116]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
@@ -2416,7 +2416,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP118]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP119]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP119]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i8> [[TMP101]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP121]], ptr [[TMP120]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
@@ -2425,7 +2425,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP122]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP123]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP123]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i8> [[TMP101]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP125]], ptr [[TMP124]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
@@ -2434,7 +2434,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP126]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP127]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP127]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i8> [[TMP101]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP129]], ptr [[TMP128]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
@@ -2443,7 +2443,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP130]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP131]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP131]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i8> [[TMP101]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP133]], ptr [[TMP132]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
@@ -2453,7 +2453,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP136]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP136]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i8> [[TMP134]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP138]], ptr [[TMP137]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
@@ -2462,7 +2462,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP140]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP140]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i8> [[TMP134]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP142]], ptr [[TMP141]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
@@ -2471,7 +2471,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP144]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP144]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP146]], ptr [[TMP145]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
@@ -2480,7 +2480,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP148]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP148]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP150]], ptr [[TMP149]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
@@ -2489,7 +2489,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP152]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP152]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP154]], ptr [[TMP153]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
@@ -2498,7 +2498,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP156]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP156]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP158]], ptr [[TMP157]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
@@ -2507,7 +2507,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP160]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP160]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP162]], ptr [[TMP161]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
@@ -2516,7 +2516,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if61:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP164]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP164]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP166]], ptr [[TMP165]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE62]]
@@ -2652,7 +2652,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
@@ -2662,7 +2662,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
@@ -2672,7 +2672,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP15]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
@@ -2682,7 +2682,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP21]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
@@ -2692,7 +2692,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP27]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
@@ -2702,7 +2702,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP33]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
@@ -2712,7 +2712,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP39]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
@@ -2722,7 +2722,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP45]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
@@ -2733,7 +2733,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP52]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
@@ -2743,7 +2743,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP58]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
@@ -2753,7 +2753,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP64]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
@@ -2763,7 +2763,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP70]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
@@ -2773,7 +2773,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP76]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
@@ -2783,7 +2783,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP82]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
@@ -2793,7 +2793,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP88]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
@@ -2803,7 +2803,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i32 [[TMP94]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
@@ -2814,7 +2814,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds nuw i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -2823,7 +2823,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
@@ -2832,7 +2832,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
@@ -2841,7 +2841,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
@@ -2850,7 +2850,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
@@ -2859,7 +2859,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
@@ -2868,7 +2868,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
@@ -2877,7 +2877,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
@@ -2887,7 +2887,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
@@ -2896,7 +2896,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
@@ -2905,7 +2905,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
@@ -2914,7 +2914,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
@@ -2923,7 +2923,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
@@ -2932,7 +2932,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
@@ -2941,7 +2941,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
@@ -2950,7 +2950,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
@@ -2980,13 +2980,13 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or disjoint <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP1]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[Q:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = sub <8 x i8> zeroinitializer, [[TMP4]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index 0d674ca2bc76a9..414394a8942e5a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -29,13 +29,13 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP2]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS:%.*]], i64 [[TMP2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP4]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP4]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP6]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP6]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP8]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP8]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP10]], ptr [[TMP3]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
@@ -48,13 +48,13 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], splat (i64 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i64> [[TMP15]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP16]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = extractelement <4 x i64> [[TMP15]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP18]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP18]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <4 x i64> [[TMP15]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP20]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP20]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <4 x i64> [[TMP15]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP22]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP22]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP24]], ptr [[TMP17]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 1
@@ -319,7 +319,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP4]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS:%.*]], i64 [[TMP4]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP6]], ptr [[TMP5]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -328,7 +328,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP8]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP8]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP10]], ptr [[TMP9]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
@@ -337,7 +337,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP12]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP12]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP14]], ptr [[TMP13]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
@@ -346,7 +346,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP16]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP18]], ptr [[TMP17]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
@@ -372,7 +372,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP4]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS:%.*]], i64 [[TMP4]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP6]], ptr [[TMP5]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
@@ -381,7 +381,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if1:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP8]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP8]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP10]], ptr [[TMP9]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
@@ -390,7 +390,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if3:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP12]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP12]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP14]], ptr [[TMP13]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
@@ -399,7 +399,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if5:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i16, ptr [[POINTS]], i64 [[TMP16]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP18]], ptr [[TMP17]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]

diff  --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
index f648de1358998e..93ac34003d4b7c 100644
--- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
+++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
@@ -38,7 +38,7 @@ define void @inv_store_last_lane(ptr noalias nocapture %a, ptr noalias nocapture
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[INV:%.*]], i64 168
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[INV:%.*]], i64 168
 ; CHECK-NEXT:    store i32 [[MUL_LCSSA]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 89ac22b054e24d..41fbf521b7ed9f 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -61,7 +61,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -102,7 +102,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP5]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -119,7 +119,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -170,7 +170,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -308,7 +308,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -349,7 +349,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP5]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -366,7 +366,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -419,7 +419,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -552,7 +552,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -586,7 +586,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -603,7 +603,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -653,7 +653,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -806,14 +806,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -857,7 +857,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND10]], ptr [[TMP6]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], ptr [[TMP7]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST]]
@@ -867,11 +867,11 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[TMP8]]
 ; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP9]]
 ; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP12]], ptr [[TMP14]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP13]], ptr [[TMP15]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP16]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP10]], ptr [[TMP16]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP17]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -891,14 +891,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -969,14 +969,14 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1134,7 +1134,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL1-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -1165,7 +1165,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -1182,7 +1182,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC4_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -1232,7 +1232,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
 ; VEC1_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -1372,7 +1372,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I]]
 ; VEC4_INTERL1-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC4_INTERL1-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC4_INTERL1-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
@@ -1400,7 +1400,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ]
 ; VEC4_INTERL2-NEXT:    [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
 ; VEC4_INTERL2-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
@@ -1488,7 +1488,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I]]
 ; VEC4_INTERL2-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC4_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC4_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
@@ -1546,7 +1546,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I]]
 ; VEC1_INTERL2-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC1_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]

diff  --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
index 00e3e669cd0139..3386e1d8563778 100644
--- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -26,19 +26,19 @@ define dso_local void @forked_ptrs_
diff erent_base_same_offset(ptr nocapture read
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]]
 ; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]]
-; CHECK-NEXT:    [[TMP1_FR:%.*]] = freeze i64 [[TMP1]]
-; CHECK-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1_FR]], 16
+; CHECK-NEXT:    [[DOTFR:%.*]] = freeze i64 [[TMP1]]
+; CHECK-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[DOTFR]], 16
 ; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[DEST1]], [[BASE15]]
-; CHECK-NEXT:    [[TMP2_FR:%.*]] = freeze i64 [[TMP2]]
-; CHECK-NEXT:    [[DIFF_CHECK7:%.*]] = icmp ult i64 [[TMP2_FR]], 16
-; CHECK-NEXT:    [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK7]]
-; CHECK-NEXT:    br i1 [[CONFLICT_RDX8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    [[DOTFR10:%.*]] = freeze i64 [[TMP2]]
+; CHECK-NEXT:    [[DIFF_CHECK6:%.*]] = icmp ult i64 [[DOTFR10]], 16
+; CHECK-NEXT:    [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE2]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE1]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT8]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -48,7 +48,7 @@ define dso_local void @forked_ptrs_
diff erent_base_same_offset(ptr nocapture read
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x ptr> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT10]]
+; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x ptr> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT9]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP8]], i64 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x ptr> [[TMP8]], i64 1
@@ -78,13 +78,13 @@ define dso_local void @forked_ptrs_
diff erent_base_same_offset(ptr nocapture read
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[PREDS]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP27]], 0
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], ptr [[BASE2]], ptr [[BASE1]]
-; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, ptr [[SPEC_SELECT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds nuw float, ptr [[SPEC_SELECT]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, ptr [[DOTSINK_IN]], align 4
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[DEST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw float, ptr [[DEST]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store float [[DOTSINK]], ptr [[TMP28]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
@@ -132,7 +132,7 @@ define dso_local void @forked_ptrs_same_base_
diff erent_offset(ptr nocapture read
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[PREDS:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -140,9 +140,9 @@ define dso_local void @forked_ptrs_same_base_
diff erent_offset(ptr nocapture read
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[IDXPROM213:%.*]] = zext i32 [[OFFSET_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[BASE:%.*]], i64 [[IDXPROM213]]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[BASE:%.*]], i64 [[IDXPROM213]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw float, ptr [[DEST:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store float [[TMP2]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]

diff  --git a/llvm/test/Transforms/LoopVectorize/histograms.ll b/llvm/test/Transforms/LoopVectorize/histograms.ll
index 5f0e3b1e10d3ea..1adc0bf0c9ec07 100644
--- a/llvm/test/Transforms/LoopVectorize/histograms.ll
+++ b/llvm/test/Transforms/LoopVectorize/histograms.ll
@@ -10,10 +10,10 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[INDICES]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP13]], 1
 ; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX2]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index d6eb4264c0a0d7..2e08e7eb4df62e 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -112,7 +112,7 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -203,7 +203,7 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 190, i32 191, i32 192, i32 193>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16
+; INTERLEAVE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -408,12 +408,12 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
-; UNROLL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 8
+; UNROLL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 8
 ; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
 ; UNROLL-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]]
 ; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
-; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 8
+; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i64 8
 ; UNROLL-NEXT:    [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope [[META7]]
 ; UNROLL-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP10]], align 4, !alias.scope [[META7]]
 ; UNROLL-NEXT:    [[TMP11:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD5]]
@@ -546,12 +546,12 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
-; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
+; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]]
 ; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
-; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 16
+; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP9]], i64 16
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META7]]
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !alias.scope [[META7]]
 ; INTERLEAVE-NEXT:    [[TMP11:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD5]]
@@ -688,7 +688,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) {
 ; IND:       for.body:
 ; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; IND-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]]
+; IND-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[I]]
 ; IND-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
 ; IND-NEXT:    [[TMP6]] = add i64 [[TMP5]], [[SUM]]
 ; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -711,7 +711,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) {
 ; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
 ; UNROLL-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
 ; UNROLL-NEXT:    [[TMP2]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
@@ -731,7 +731,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) {
 ; UNROLL:       for.body:
 ; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP8:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]]
+; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[I]]
 ; UNROLL-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
 ; UNROLL-NEXT:    [[TMP8]] = add i64 [[TMP7]], [[SUM]]
 ; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -800,7 +800,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) {
 ; INTERLEAVE-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
+; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 32
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
 ; INTERLEAVE-NEXT:    [[TMP2]] = add <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
@@ -820,7 +820,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) {
 ; INTERLEAVE:       for.body:
 ; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; INTERLEAVE-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP8:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]]
+; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[I]]
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8
 ; INTERLEAVE-NEXT:    [[TMP8]] = add i64 [[TMP7]], [[SUM]]
 ; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1320,7 +1320,7 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
 ; IND-NEXT:    br label [[FOR_BODY:%.*]]
 ; IND:       for.body:
 ; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; IND-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
+; IND-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
 ; IND-NEXT:    [[TMP11:%.*]] = load i32, ptr [[F]], align 8
 ; IND-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP11]], [[Y]]
 ; IND-NEXT:    store i32 [[TMP12]], ptr [[F]], align 8
@@ -1378,7 +1378,7 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
 ; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
 ; UNROLL:       for.body:
 ; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; UNROLL-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
+; UNROLL-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
 ; UNROLL-NEXT:    [[TMP22:%.*]] = load i32, ptr [[F]], align 8
 ; UNROLL-NEXT:    [[TMP23:%.*]] = xor i32 [[TMP22]], [[Y]]
 ; UNROLL-NEXT:    store i32 [[TMP23]], ptr [[F]], align 8
@@ -1661,9 +1661,9 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; IND:       for.body:
 ; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[DOTIDX:%.*]] = shl nsw i64 [[I]], 4
-; IND-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[DOTIDX]]
+; IND-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[DOTIDX]]
 ; IND-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 1
-; IND-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
+; IND-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
 ; IND-NEXT:    store i32 [[TMP21]], ptr [[TMP22]], align 1
 ; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; IND-NEXT:    [[TMP23:%.*]] = trunc i64 [[I_NEXT]] to i32
@@ -1738,9 +1738,9 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; UNROLL:       for.body:
 ; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[DOTIDX:%.*]] = shl nsw i64 [[I]], 4
-; UNROLL-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[DOTIDX]]
+; UNROLL-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[DOTIDX]]
 ; UNROLL-NEXT:    [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 1
-; UNROLL-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
+; UNROLL-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1
 ; UNROLL-NEXT:    store i32 [[TMP32]], ptr [[TMP33]], align 1
 ; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; UNROLL-NEXT:    [[TMP34:%.*]] = trunc i64 [[I_NEXT]] to i32
@@ -2073,7 +2073,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; IND-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
 ; IND-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
 ; IND-NEXT:    [[TMP16:%.*]] = zext nneg i32 [[I]] to i64
-; IND-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
+; IND-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP16]]
 ; IND-NEXT:    [[VAR1:%.*]] = load i32, ptr [[VAR0]], align 4
 ; IND-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; IND:       if.then:
@@ -2105,7 +2105,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE8]] ]
 ; UNROLL-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
+; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
 ; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
@@ -2163,7 +2163,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; UNROLL-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
 ; UNROLL-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
 ; UNROLL-NEXT:    [[TMP26:%.*]] = zext nneg i32 [[I]] to i64
-; UNROLL-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]]
+; UNROLL-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP26]]
 ; UNROLL-NEXT:    [[VAR1:%.*]] = load i32, ptr [[VAR0]], align 4
 ; UNROLL-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; UNROLL:       if.then:
@@ -2292,7 +2292,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; INTERLEAVE-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE16]] ]
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
@@ -2386,7 +2386,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) {
 ; INTERLEAVE-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
 ; INTERLEAVE-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
 ; INTERLEAVE-NEXT:    [[TMP46:%.*]] = zext nneg i32 [[I]] to i64
-; INTERLEAVE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP46]]
+; INTERLEAVE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP46]]
 ; INTERLEAVE-NEXT:    [[VAR1:%.*]] = load i32, ptr [[VAR0]], align 4
 ; INTERLEAVE-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; INTERLEAVE:       if.then:
@@ -2527,7 +2527,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; IND-NEXT:    [[TMP11:%.*]] = trunc i64 [[I]] to i32
 ; IND-NEXT:    [[TMP12:%.*]] = add i32 [[A]], [[TMP11]]
 ; IND-NEXT:    [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
-; IND-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[I]], i32 1
+; IND-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[PAIR_I16]], ptr [[P]], i64 [[I]], i32 1
 ; IND-NEXT:    store i16 [[TMP13]], ptr [[TMP14]], align 2
 ; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; IND-NEXT:    [[TMP15:%.*]] = trunc i64 [[I_NEXT]] to i32
@@ -2586,7 +2586,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; UNROLL-NEXT:    [[TMP19:%.*]] = trunc i64 [[I]] to i32
 ; UNROLL-NEXT:    [[TMP20:%.*]] = add i32 [[A]], [[TMP19]]
 ; UNROLL-NEXT:    [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16
-; UNROLL-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[I]], i32 1
+; UNROLL-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw [[PAIR_I16]], ptr [[P]], i64 [[I]], i32 1
 ; UNROLL-NEXT:    store i16 [[TMP21]], ptr [[TMP22]], align 2
 ; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; UNROLL-NEXT:    [[TMP23:%.*]] = trunc i64 [[I_NEXT]] to i32
@@ -2722,7 +2722,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; INTERLEAVE-NEXT:    [[TMP31:%.*]] = trunc i64 [[I]] to i32
 ; INTERLEAVE-NEXT:    [[TMP32:%.*]] = add i32 [[A]], [[TMP31]]
 ; INTERLEAVE-NEXT:    [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16
-; INTERLEAVE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[I]], i32 1
+; INTERLEAVE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[PAIR_I16]], ptr [[P]], i64 [[I]], i32 1
 ; INTERLEAVE-NEXT:    store i16 [[TMP33]], ptr [[TMP34]], align 2
 ; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; INTERLEAVE-NEXT:    [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32
@@ -3595,7 +3595,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; UNROLL-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
 ; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 8
+; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -3737,7 +3737,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16
+; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -3981,7 +3981,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; UNROLL-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
 ; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 8
+; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -4129,7 +4129,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
-; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16
+; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -4277,7 +4277,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
+; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -4352,7 +4352,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -4493,7 +4493,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
 ; UNROLL-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
 ; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
-; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
+; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -4583,7 +4583,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; INTERLEAVE-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
-; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -4728,7 +4728,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
 ; UNROLL-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
 ; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
-; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
+; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -4815,7 +4815,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
-; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -4924,7 +4924,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; IND:       for.body:
 ; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; IND-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; IND-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
+; IND-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[I]]
 ; IND-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
 ; IND-NEXT:    store i32 [[VAR1]], ptr [[VAR0]], align 4
 ; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -4948,7 +4948,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -4965,7 +4965,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; UNROLL:       for.body:
 ; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; UNROLL-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; UNROLL-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
+; UNROLL-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[I]]
 ; UNROLL-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
 ; UNROLL-NEXT:    store i32 [[VAR1]], ptr [[VAR0]], align 4
 ; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -5033,7 +5033,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -5050,7 +5050,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; INTERLEAVE:       for.body:
 ; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; INTERLEAVE-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; INTERLEAVE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
+; INTERLEAVE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[I]]
 ; INTERLEAVE-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
 ; INTERLEAVE-NEXT:    store i32 [[VAR1]], ptr [[VAR0]], align 4
 ; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -6370,7 +6370,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; UNROLL-NEXT:    [[TMP18:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NEXT:    [[TMP19:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 8
+; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[TMP19]], ptr [[TMP21]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -6526,7 +6526,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; INTERLEAVE-NEXT:    [[TMP18:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; INTERLEAVE-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; INTERLEAVE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 16
+; INTERLEAVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
index a7f3057935b6de..7bbf750f1702bb 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
@@ -55,7 +55,7 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i8 [[DOTCAST3]], 1
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[X]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = zext i8 [[OFFSET_IDX]] to i64
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP14]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], splat (i32 1)
@@ -78,7 +78,7 @@ define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapt
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[INDEX_011:%.*]] = phi i8 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i8 [[INDEX_011]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl i32 [[TMP27]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index dc4fb0f15c6447..16ba3b26e68ae3 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -423,7 +423,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[INDEX]], 9223372036854775804
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 508
@@ -436,11 +436,11 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 1016, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1022
@@ -499,7 +499,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP7]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -517,7 +517,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]]

diff  --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
index 45de11141235e6..f5c0d8bb85836e 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll
@@ -59,7 +59,7 @@ define void @inv_val_store_to_inv_address_conditional_
diff _values_ic(ptr %a, i64
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[I1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I2]], [[K]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[I1]], align 4
@@ -157,7 +157,7 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[I1]], align 4
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_STORE:%.*]], label [[COND_STORE_K:%.*]]
 ; CHECK:       cond_store:
@@ -247,7 +247,7 @@ define i32 @variant_val_store_to_inv_address(ptr %a, i64 %n, ptr %b, i32 %k) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I0:%.*]] = phi i32 [ [[I3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[I1]], align 8
 ; CHECK-NEXT:    store i32 [[I2]], ptr [[A]], align 4
 ; CHECK-NEXT:    [[I3]] = add i32 [[I0]], [[I2]]

diff  --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 1473d292d06ac9..689d00e1d01863 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -56,7 +56,7 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b)
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I0:%.*]] = phi i32 [ [[I3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[I1]], align 8
 ; CHECK-NEXT:    [[I3]] = add i32 [[I0]], [[I2]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[A]], align 4
@@ -126,7 +126,7 @@ define void @inv_val_store_to_inv_address(ptr %a, i64 %n, ptr %b) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[A]], align 4
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[I1]], align 4
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -228,7 +228,7 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[I1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I2]], [[K]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[I1]], align 4
@@ -282,7 +282,7 @@ define void @inv_val_store_to_inv_address_conditional_
diff _values(ptr %a, i64 %n
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[I]]
+; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[I2:%.*]] = load i32, ptr [[I1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[I2]], [[K:%.*]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], ptr [[I1]], align 4
@@ -356,7 +356,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]]
 ; CHECK-NEXT:    br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]]
 ; CHECK:       for.body3.lr.ph:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1]], i64 [[INDVARS_IV23]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[J_022]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[J_022]], -1
@@ -487,14 +487,14 @@ define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocaptu
 ; CHECK-NEXT:    [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]]
 ; CHECK-NEXT:    br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]]
 ; CHECK:       for.body3.lr.ph:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[J_022]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ]
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[ADD]], 42
@@ -591,12 +591,12 @@ define void @unsafe_dep_uniform_load_store(i32 %arg, i32 %arg1, i64 %arg2, ptr %
 ; CHECK-NEXT:    [[I15:%.*]] = trunc i64 [[I8]] to i32
 ; CHECK-NEXT:    [[I16:%.*]] = add i32 [[ARG:%.*]], [[I15]]
 ; CHECK-NEXT:    [[I17:%.*]] = zext i32 [[I16]] to i64
-; CHECK-NEXT:    [[I18:%.*]] = getelementptr inbounds i16, ptr [[I6]], i64 [[I17]]
+; CHECK-NEXT:    [[I18:%.*]] = getelementptr inbounds nuw i16, ptr [[I6]], i64 [[I17]]
 ; CHECK-NEXT:    store i16 [[I14]], ptr [[I18]], align 2
 ; CHECK-NEXT:    [[I19:%.*]] = add i32 [[I13]], [[I9]]
 ; CHECK-NEXT:    [[I20:%.*]] = trunc i32 [[I19]] to i16
 ; CHECK-NEXT:    [[I21:%.*]] = and i16 [[I20]], 255
-; CHECK-NEXT:    [[I22:%.*]] = getelementptr inbounds i16, ptr [[ARG3]], i64 [[I17]]
+; CHECK-NEXT:    [[I22:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG3]], i64 [[I17]]
 ; CHECK-NEXT:    store i16 [[I21]], ptr [[I22]], align 2
 ; CHECK-NEXT:    [[I23]] = add nsw i32 [[I9]], 1
 ; CHECK-NEXT:    [[I24]] = add nuw nsw i64 [[I8]], 1

diff  --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
index 0bffac24227571..ffeb3b19d11c4d 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
@@ -32,8 +32,8 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[I]]
 ; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
@@ -206,7 +206,7 @@ define void @no_gep_or_bitcast(ptr noalias %a, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[I]]
 ; CHECK-NEXT:    [[VAR1:%.*]] = load ptr, ptr [[VAR0]], align 8
 ; CHECK-NEXT:    store i32 0, ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
index 1d35a18c98faea..81163c45abe952 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
@@ -16,9 +16,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 32
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index fe74a7c3a9b27c..6315ce541b38bf 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -1037,9 +1037,9 @@ define float @reduction_fmuladd_recurrence_first_arg(ptr %a, ptr %b, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[SUM_07]], float [[TMP0]], float [[TMP1]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -1075,9 +1075,9 @@ define float @reduction_fmuladd_recurrence_second_arg(ptr %a, ptr %b, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[SUM_07]], float [[TMP1]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1

diff  --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index bbdff969ebddde..b60a8e8da7f35b 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -1141,7 +1141,7 @@ define void @reduction_reset(i32 %N, ptr %arrayA, ptr %arrayB) {
 ; CHECK:       .lr.ph:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ]
 ; CHECK-NEXT:    [[DOT017:%.*]] = phi i32 [ 100, [[DOTLR_PH_PREHEADER]] ], [ [[CSEL:%.*]], [[DOTLR_PH]] ]
-; CHECK-NEXT:    [[C6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYA]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[C6:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYA]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[C7:%.*]] = load i32, ptr [[C6]], align 4
 ; CHECK-NEXT:    [[C8:%.*]] = icmp sgt i32 [[C7]], 0
 ; CHECK-NEXT:    [[C9:%.*]] = add nsw i32 [[C7]], [[DOT017]]

diff  --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index ccf02b96abc9d2..5c817ea3131832 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -238,7 +238,7 @@ define void @test_runtime_check2(ptr %a, float %b, i64 %offset, i64 %offset2, i6
 ; CHECK-NEXT:    [[C_IDX:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -4
 ; CHECK-NEXT:    [[LC:%.*]] = load float, ptr [[C_IDX]], align 4
 ; CHECK-NEXT:    [[VC:%.*]] = fadd float [[LC]], 1.000000e+00
-; CHECK-NEXT:    [[C_IDX2:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[IV]]
+; CHECK-NEXT:    [[C_IDX2:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[IV]]
 ; CHECK-NEXT:    store float [[VC]], ptr [[C_IDX2]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]

diff  --git a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
index c425d21612fd13..68b4260c037372 100644
--- a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
@@ -11,7 +11,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:   %[[T2:.+]] = add nuw nsw i64 [[OFFSET_IDX]], %tmp0
 ; CHECK:   %[[T3:.+]] = sub nsw i64 %[[T2]], %x
 ; CHECK:   %[[T4:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T3]]
-; CHECK:   %[[T6:.+]] = getelementptr inbounds i8, ptr %[[T4]], i64 16
+; CHECK:   %[[T6:.+]] = getelementptr inbounds nuw i8, ptr %[[T4]], i64 16
 ; CHECK:   load <4 x i32>, ptr %[[T4]], align 4
 ; CHECK:   load <4 x i32>, ptr %[[T6]], align 4
 ; CHECK:   br {{.*}}, label %middle.block, label %vector.body

diff  --git a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
index 04437079121dc4..3afd8553f8a7e9 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
@@ -9,23 +9,23 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP2]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i8 [[AND_LCSSA_OFF0]]
 ;
 entry:
@@ -56,23 +56,23 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP2]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP2]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
 ;
 entry:
@@ -103,23 +103,23 @@ define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP2]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP2]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
 ;
 entry:
@@ -144,9 +144,24 @@ for.end:
 
 define i8 @reduction_smin_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_smin_trunc(
-; CHECK-NOT: vector.body
-; CHECK-NOT: <8 x
-; CHECK: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 256, [[ENTRY]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[EXT:%.*]] = sext i8 [[LOAD]] to i32
+; CHECK-NEXT:    [[MIN]] = call i32 @llvm.smin.i32(i32 [[SUM_02]], i32 [[EXT]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[MIN]] to i8
+; CHECK-NEXT:    ret i8 [[RET]]
+;
 entry:
   br label %for.body
 
@@ -170,9 +185,24 @@ for.end:
 
 define i8 @reduction_umin_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_umin_trunc(
-; CHECK-NOT: vector.body
-; CHECK-NOT: <8 x
-; CHECK: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[LOAD]] to i32
+; CHECK-NEXT:    [[MIN]] = call i32 @llvm.umin.i32(i32 [[SUM_02]], i32 [[EXT]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[RET:%.*]] = trunc nuw i32 [[MIN]] to i8
+; CHECK-NEXT:    ret i8 [[RET]]
+;
 entry:
   br label %for.body
 
@@ -196,9 +226,24 @@ for.end:
 
 define i16 @reduction_smax_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_smax_trunc(
-; CHECK-NOT: vector.body
-; CHECK-NOT: <8 x
-; CHECK: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[EXT:%.*]] = sext i16 [[LOAD]] to i32
+; CHECK-NEXT:    [[MIN]] = call i32 @llvm.smax.i32(i32 [[SUM_02]], i32 [[EXT]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[MIN]] to i16
+; CHECK-NEXT:    ret i16 [[RET]]
+;
 entry:
   br label %for.body
 
@@ -222,9 +267,24 @@ for.end:
 
 define i16 @reduction_umax_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_umax_trunc(
-; CHECK-NOT: vector.body
-; CHECK-NOT: <8 x
-; CHECK: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[MIN:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[IV]] to i64
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[EXT:%.*]] = zext i16 [[LOAD]] to i32
+; CHECK-NEXT:    [[MIN]] = call i32 @llvm.umax.i32(i32 [[SUM_02]], i32 [[EXT]])
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[RET:%.*]] = trunc nuw i32 [[MIN]] to i16
+; CHECK-NEXT:    ret i16 [[RET]]
+;
 entry:
   br label %for.body
 
@@ -273,7 +333,7 @@ define i32 @reduction_and_or(i16 %a, i32 %b, ptr %src) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 992, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[OR67:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg i32 [[IV]] to i64
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP5]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[OR]] = or i32 [[OR67]], [[L]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1

diff  --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
index e307d93b1896a8..94bc32205ec11f 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
@@ -31,8 +31,8 @@ define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[I]]
 ; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
@@ -65,7 +65,7 @@ define void @uniform_vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 4
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -84,8 +84,8 @@ define void @uniform_vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 4
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 4
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[I]]
 ; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]

diff  --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
index eec772c52bbb69..3f9bd9333805f3 100644
--- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
+++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
@@ -14,7 +14,7 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
 ; CHECK-NEXT:    [[CMP14:%.*]] = icmp eq i32 [[ITR:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP14]], label [[FOR_END13:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader.preheader:
-; CHECK-NEXT:    [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 4
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[ITR]], -1
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
@@ -22,25 +22,25 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
 ; CHECK-NEXT:    [[J_016:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC11]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[I_015:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_INC11]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[INDVAR]], 2
-; CHECK-NEXT:    [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[VAR3:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[VAR3:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], 4
-; CHECK-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[VAR3]], i64 [[TMP2]]
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[VAR3]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[CMP212:%.*]] = icmp ult i32 [[J_016]], [[ITR]]
 ; CHECK-NEXT:    br i1 [[CMP212]], label [[FOR_BODY3_LVER_CHECK:%.*]], label [[FOR_INC11]]
 ; CHECK:       for.body3.lver.check:
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[I_015]], [[ITR]]
 ; CHECK-NEXT:    [[IDXPROM6:%.*]] = zext i32 [[I_015]] to i64
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[VAR3]], i64 [[IDXPROM6]]
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR3]], i64 [[IDXPROM6]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[J_016]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[VAR1]], i64 [[TMP4]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[VAR1]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 [[TMP0]], [[J_016]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP4]], [[TMP7]]
-; CHECK-NEXT:    [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[UGLYGEP1]], i64 [[TMP8]]
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[UGLYGEP]], [[UGLYGEP4]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[UGLYGEP3]], [[UGLYGEP2]]
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[TMP8]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP4]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP3]], [[SCEVGEP2]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY3_PH_LVER_ORIG:%.*]], label [[FOR_BODY3_PH:%.*]]
 ; CHECK:       for.body3.ph.lver.orig:
@@ -48,7 +48,7 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
 ; CHECK:       for.body3.lver.orig:
 ; CHECK-NEXT:    [[J_113_LVER_ORIG:%.*]] = phi i32 [ [[J_016]], [[FOR_BODY3_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY3_LVER_ORIG]] ]
 ; CHECK-NEXT:    [[IDXPROM_LVER_ORIG:%.*]] = zext i32 [[J_113_LVER_ORIG]] to i64
-; CHECK-NEXT:    [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[IDXPROM_LVER_ORIG]]
+; CHECK-NEXT:    [[ARRAYIDX_LVER_ORIG:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1]], i64 [[IDXPROM_LVER_ORIG]]
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX_LVER_ORIG]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[ADD8_LVER_ORIG:%.*]] = add nsw i32 [[TMP9]], [[ADD]]
@@ -57,14 +57,14 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
 ; CHECK-NEXT:    [[CMP2_LVER_ORIG:%.*]] = icmp ult i32 [[INC_LVER_ORIG]], [[ITR]]
 ; CHECK-NEXT:    br i1 [[CMP2_LVER_ORIG]], label [[FOR_BODY3_LVER_ORIG]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       for.body3.ph:
-; CHECK-NEXT:    [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope !2, !noalias !2
+; CHECK-NEXT:    [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2:![0-9]+]], !noalias [[META2]]
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
 ; CHECK-NEXT:    [[ADD86:%.*]] = phi i32 [ [[ARRAYIDX7_PROMOTED]], [[FOR_BODY3_PH]] ], [ [[ADD8:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[J_113:%.*]] = phi i32 [ [[J_016]], [[FOR_BODY3_PH]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[J_113]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[IDXPROM]]
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope !2, !noalias !2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1]], i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope [[META2]], !noalias [[META2]]
 ; CHECK-NEXT:    [[ADD8]] = add nsw i32 [[ADD86]], [[ADD]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J_113]], 1
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[INC]], [[ITR]]
@@ -73,7 +73,7 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture
 ; CHECK-NEXT:    br label [[FOR_INC11_LOOPEXIT:%.*]]
 ; CHECK:       for.inc11.loopexit.loopexit5:
 ; CHECK-NEXT:    [[ADD8_LCSSA:%.*]] = phi i32 [ [[ADD8]], [[FOR_BODY3]] ]
-; CHECK-NEXT:    store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope !2, !noalias !2
+; CHECK-NEXT:    store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2]], !noalias [[META2]]
 ; CHECK-NEXT:    br label [[FOR_INC11_LOOPEXIT]]
 ; CHECK:       for.inc11.loopexit:
 ; CHECK-NEXT:    br label [[FOR_INC11]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
index f44e39e82d6063..4423b89d815656 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
@@ -30,7 +30,7 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI11:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
@@ -49,7 +49,7 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER13]] ]
 ; CHECK-NEXT:    [[SUM_07:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[SUM_07_PH]], [[FOR_BODY_PREHEADER13]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP8]], [[SUM_07]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -145,7 +145,7 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[S_COERCE0]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
@@ -225,7 +225,7 @@ define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_acces
 ; CHECK-LABEL: define hidden noundef nonnull align 4 dereferenceable(4) ptr @span_checked_access(
 ; CHECK-SAME: ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) [[THIS:%.*]], i64 noundef [[__IDX:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__SIZE__I:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
+; CHECK-NEXT:    [[__SIZE__I:%.*]] = getelementptr inbounds nuw i8, ptr [[THIS]], i64 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[__SIZE__I]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[__IDX]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[COND_END:%.*]], label [[COND_FALSE:%.*]], !prof [[PROF4]]
@@ -270,7 +270,7 @@ define hidden noundef i64 @span_access(ptr noundef nonnull align 8 dereferenceab
 ; CHECK-LABEL: define hidden noundef i64 @span_access(
 ; CHECK-SAME: ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__SIZE_:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
+; CHECK-NEXT:    [[__SIZE_:%.*]] = getelementptr inbounds nuw i8, ptr [[THIS]], i64 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[__SIZE_]], align 8
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
index 4a5ce1359d2571..89b2f039c827d9 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -49,7 +49,7 @@ define void @loop(ptr %X, ptr %Y) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
@@ -61,7 +61,7 @@ define void @loop(ptr %X, ptr %Y) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP11]], i64 16
 ; CHECK-NEXT:    store <2 x double> [[TMP9]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -71,13 +71,13 @@ define void @loop(ptr %X, ptr %Y) {
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt double [[TMP14]], 0.000000e+00
 ; CHECK-NEXT:    [[CMP1_I:%.*]] = fcmp ogt double [[TMP14]], 6.000000e+00
 ; CHECK-NEXT:    [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP14]]
 ; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store double [[RETVAL_0_I]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20000
@@ -149,13 +149,13 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], splat (i32 20)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META7]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]]
@@ -168,7 +168,7 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD11]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP6]], <4 x float> [[TMP10]]
 ; CHECK-NEXT:    [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP11]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]]
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -176,13 +176,13 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       loop.body:
 ; CHECK-NEXT:    [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV1]]
+; CHECK-NEXT:    [[C_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i64 [[IV1]]
 ; CHECK-NEXT:    [[C_LV:%.*]] = load i32, ptr [[C_GEP]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C_LV]], 20
-; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]]
+; CHECK-NEXT:    [[A_GEP_0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV1]]
 ; CHECK-NEXT:    [[A_LV_0:%.*]] = load float, ptr [[A_GEP_0]], align 4
 ; CHECK-NEXT:    [[MUL2_I81_I:%.*]] = fmul float [[X]], [[A_LV_0]]
-; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV1]]
+; CHECK-NEXT:    [[B_GEP_0:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[IV1]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_LATCH]], label [[ELSE:%.*]]
 ; CHECK:       else:
 ; CHECK-NEXT:    [[B_LV:%.*]] = load float, ptr [[B_GEP_0]], align 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
index af24a9ab9e3f7e..579961f546c0b4 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll
@@ -58,11 +58,11 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
 ; CHECK-NEXT:    [[TMP19:%.*]] = mul nuw i64 [[INDEX]], [[TMP1]]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[TMP19]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 16
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP21]], align 4, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 16
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4, !alias.scope [[META3]], !noalias [[META0]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD11]], [[WIDE_LOAD]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll
index e514defc91e28a..b14a36c8b3bcd0 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll
@@ -18,7 +18,7 @@ define dso_local void @_Z3fooPiii(ptr %A, i32 %N, i32 %M) #0 {
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
 ; CHECK:       for.cond1.preheader.us:
 ; CHECK-NEXT:    [[INDVAR6:%.*]] = phi i64 [ [[INDVAR_NEXT7:%.*]], [[FOR_COND1_PREHEADER_US]] ], [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ]
-; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR6]]
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVAR6]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
 ; CHECK-NEXT:    tail call void @_Z1fi(i32 [[TMP2]])
 ; CHECK-NEXT:    [[INDVAR_NEXT7]] = add nuw i64 [[INDVAR6]], 1

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
index 5aee938f5191f2..ff9ad9ffa14080 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -12,16 +12,16 @@ define void @matrix_extract_insert_scalar(i32 %i, i32 %k, i32 %j, ptr nonnull al
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp samesign ult i64 [[TMP1]], 225
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[MATRIXEXT:%.*]] = load double, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i32 [[I:%.*]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp samesign ult i64 [[TMP4]], 225
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP5]])
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B:%.*]], i64 0, i64 [[TMP4]]
 ; CHECK-NEXT:    [[MATRIXEXT4:%.*]] = load double, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[MATRIXEXT7:%.*]] = load double, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
 ; CHECK-NEXT:    store double [[SUB]], ptr [[TMP7]], align 8
@@ -93,7 +93,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[I]], 225
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[CONV6]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[CONV6]]
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i32 [[I]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
@@ -124,7 +124,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i1> [[TMP12]], i64 1
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP16]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 16
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP17]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP17]], align 8, !alias.scope [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD21:%.*]] = load <2 x double>, ptr [[TMP18]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = load double, ptr [[TMP3]], align 8, !alias.scope [[META3:![0-9]+]]
@@ -133,7 +133,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP20:%.*]] = fmul <2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT23]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = fmul <2 x double> [[WIDE_LOAD21]], [[BROADCAST_SPLAT23]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 16
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD24:%.*]] = load <2 x double>, ptr [[TMP22]], align 8, !alias.scope [[META5:![0-9]+]], !noalias [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD25:%.*]] = load <2 x double>, ptr [[TMP23]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fsub <2 x double> [[WIDE_LOAD24]], [[TMP20]]
@@ -153,11 +153,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY4_US_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 225
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP27]])
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[MATRIXEXT_US:%.*]] = load double, ptr [[TMP28]], align 8
 ; CHECK-NEXT:    [[MATRIXEXT8_US:%.*]] = load double, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]]
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[MATRIXEXT11_US:%.*]] = load double, ptr [[TMP29]], align 8
 ; CHECK-NEXT:    [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]]
 ; CHECK-NEXT:    store double [[SUB_US]], ptr [[TMP29]], align 8
@@ -168,7 +168,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP30:%.*]] = add nuw nsw i64 [[CONV6]], 15
 ; CHECK-NEXT:    [[TMP31:%.*]] = icmp samesign ult i32 [[I]], 210
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP31]])
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP30]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP30]]
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK_1:%.*]] = icmp samesign ult i32 [[I]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[VECTOR_MEMCHECK_1:%.*]]
 ; CHECK:       vector.memcheck.1:
@@ -200,7 +200,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <2 x i1> [[TMP42]], i64 1
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP46]])
 ; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP33]]
-; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[TMP47]], i64 16
+; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP47]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr [[TMP47]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD21_1:%.*]] = load <2 x double>, ptr [[TMP48]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP49:%.*]] = load double, ptr [[TMP32]], align 8, !alias.scope [[META3]]
@@ -209,7 +209,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP50:%.*]] = fmul <2 x double> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT23_1]]
 ; CHECK-NEXT:    [[TMP51:%.*]] = fmul <2 x double> [[WIDE_LOAD21_1]], [[BROADCAST_SPLAT23_1]]
 ; CHECK-NEXT:    [[TMP52:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP33]]
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[TMP52]], i64 16
+; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP52]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD24_1:%.*]] = load <2 x double>, ptr [[TMP52]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD25_1:%.*]] = load <2 x double>, ptr [[TMP53]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[TMP54:%.*]] = fsub <2 x double> [[WIDE_LOAD24_1]], [[TMP50]]
@@ -230,11 +230,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP57:%.*]] = add nuw nsw i64 [[INDVARS_IV_1]], 15
 ; CHECK-NEXT:    [[TMP58:%.*]] = icmp samesign ult i64 [[INDVARS_IV_1]], 210
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP58]])
-; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP57]]
+; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP57]]
 ; CHECK-NEXT:    [[MATRIXEXT_US_1:%.*]] = load double, ptr [[TMP59]], align 8
 ; CHECK-NEXT:    [[MATRIXEXT8_US_1:%.*]] = load double, ptr [[TMP32]], align 8
 ; CHECK-NEXT:    [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]]
-; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP57]]
+; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP57]]
 ; CHECK-NEXT:    [[MATRIXEXT11_US_1:%.*]] = load double, ptr [[TMP60]], align 8
 ; CHECK-NEXT:    [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]]
 ; CHECK-NEXT:    store double [[SUB_US_1]], ptr [[TMP60]], align 8
@@ -245,7 +245,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP61:%.*]] = add nuw nsw i64 [[CONV6]], 30
 ; CHECK-NEXT:    [[TMP62:%.*]] = icmp samesign ult i32 [[I]], 195
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP62]])
-; CHECK-NEXT:    [[TMP63:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP61]]
+; CHECK-NEXT:    [[TMP63:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP61]]
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK_2:%.*]] = icmp samesign ult i32 [[I]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[VECTOR_MEMCHECK_2:%.*]]
 ; CHECK:       vector.memcheck.2:
@@ -277,7 +277,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <2 x i1> [[TMP73]], i64 1
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP77]])
 ; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP64]]
-; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, ptr [[TMP78]], i64 16
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP78]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr [[TMP78]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD21_2:%.*]] = load <2 x double>, ptr [[TMP79]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP80:%.*]] = load double, ptr [[TMP63]], align 8, !alias.scope [[META3]]
@@ -286,7 +286,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP81:%.*]] = fmul <2 x double> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT23_2]]
 ; CHECK-NEXT:    [[TMP82:%.*]] = fmul <2 x double> [[WIDE_LOAD21_2]], [[BROADCAST_SPLAT23_2]]
 ; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP64]]
-; CHECK-NEXT:    [[TMP84:%.*]] = getelementptr inbounds i8, ptr [[TMP83]], i64 16
+; CHECK-NEXT:    [[TMP84:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP83]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD24_2:%.*]] = load <2 x double>, ptr [[TMP83]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD25_2:%.*]] = load <2 x double>, ptr [[TMP84]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[TMP85:%.*]] = fsub <2 x double> [[WIDE_LOAD24_2]], [[TMP81]]
@@ -307,11 +307,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP88:%.*]] = add nuw nsw i64 [[INDVARS_IV_2]], 30
 ; CHECK-NEXT:    [[TMP89:%.*]] = icmp samesign ult i64 [[INDVARS_IV_2]], 195
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP89]])
-; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP88]]
 ; CHECK-NEXT:    [[MATRIXEXT_US_2:%.*]] = load double, ptr [[TMP90]], align 8
 ; CHECK-NEXT:    [[MATRIXEXT8_US_2:%.*]] = load double, ptr [[TMP63]], align 8
 ; CHECK-NEXT:    [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]]
-; CHECK-NEXT:    [[TMP91:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP88]]
+; CHECK-NEXT:    [[TMP91:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP88]]
 ; CHECK-NEXT:    [[MATRIXEXT11_US_2:%.*]] = load double, ptr [[TMP91]], align 8
 ; CHECK-NEXT:    [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]]
 ; CHECK-NEXT:    store double [[SUB_US_2]], ptr [[TMP91]], align 8
@@ -322,7 +322,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP92:%.*]] = add nuw nsw i64 [[CONV6]], 45
 ; CHECK-NEXT:    [[TMP93:%.*]] = icmp samesign ult i32 [[I]], 180
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP93]])
-; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP92]]
+; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP92]]
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK_3:%.*]] = icmp samesign ult i32 [[I]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[VECTOR_MEMCHECK_3:%.*]]
 ; CHECK:       vector.memcheck.3:
@@ -354,7 +354,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP108:%.*]] = extractelement <2 x i1> [[TMP104]], i64 1
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP108]])
 ; CHECK-NEXT:    [[TMP109:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP95]]
-; CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[TMP109]], i64 16
+; CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP109]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr [[TMP109]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD21_3:%.*]] = load <2 x double>, ptr [[TMP110]], align 8, !alias.scope [[META0]]
 ; CHECK-NEXT:    [[TMP111:%.*]] = load double, ptr [[TMP94]], align 8, !alias.scope [[META3]]
@@ -363,7 +363,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP112:%.*]] = fmul <2 x double> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT23_3]]
 ; CHECK-NEXT:    [[TMP113:%.*]] = fmul <2 x double> [[WIDE_LOAD21_3]], [[BROADCAST_SPLAT23_3]]
 ; CHECK-NEXT:    [[TMP114:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP95]]
-; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, ptr [[TMP114]], i64 16
+; CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP114]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD24_3:%.*]] = load <2 x double>, ptr [[TMP114]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[WIDE_LOAD25_3:%.*]] = load <2 x double>, ptr [[TMP115]], align 8, !alias.scope [[META5]], !noalias [[META0]]
 ; CHECK-NEXT:    [[TMP116:%.*]] = fsub <2 x double> [[WIDE_LOAD24_3]], [[TMP112]]
@@ -384,11 +384,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP119:%.*]] = add nuw nsw i64 [[INDVARS_IV_3]], 45
 ; CHECK-NEXT:    [[TMP120:%.*]] = icmp samesign ult i64 [[INDVARS_IV_3]], 180
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP120]])
-; CHECK-NEXT:    [[TMP121:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP119]]
+; CHECK-NEXT:    [[TMP121:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[A]], i64 0, i64 [[TMP119]]
 ; CHECK-NEXT:    [[MATRIXEXT_US_3:%.*]] = load double, ptr [[TMP121]], align 8
 ; CHECK-NEXT:    [[MATRIXEXT8_US_3:%.*]] = load double, ptr [[TMP94]], align 8
 ; CHECK-NEXT:    [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]]
-; CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP119]]
+; CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds nuw <225 x double>, ptr [[B]], i64 0, i64 [[TMP119]]
 ; CHECK-NEXT:    [[MATRIXEXT11_US_3:%.*]] = load double, ptr [[TMP122]], align 8
 ; CHECK-NEXT:    [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]]
 ; CHECK-NEXT:    store double [[SUB_US_3]], ptr [[TMP122]], align 8

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
index 97d2247ab13c14..1d7392ec430f8d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
@@ -28,7 +28,7 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    [[VEC_PHI16:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI17:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 8
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX1]], i64 8
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD18:%.*]] = load <2 x float>, ptr [[TMP23]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
@@ -68,7 +68,7 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
 ; CHECK-NEXT:    [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
 ; CHECK-NEXT:    [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[TMP0]] to double
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
@@ -214,7 +214,7 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R
 ; CHECK-NEXT:    [[VEC_PHI32:%.*]] = phi <2 x double> [ [[TMP27]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI33:%.*]] = phi <2 x double> [ splat (double -0.000000e+00), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_US1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_US1]], i64 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_US1]], i64 8
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX_US1]], align 4
 ; CHECK-NEXT:    [[WIDE_LOAD34:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
@@ -255,7 +255,7 @@ define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %R
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
 ; CHECK-NEXT:    [[V1_116_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V1_116_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
 ; CHECK-NEXT:    [[V0_115_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_BODY3_US]] ], [ [[V0_115_US_PH]], %[[FOR_BODY3_US_PREHEADER]] ]
-; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds nuw float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
 ; CHECK-NEXT:    [[CONV_US:%.*]] = fpext float [[TMP0]] to double
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.exp2.f64(double [[CONV_US]])

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
index eba0ed2d35fe46..eaced404bba282 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
@@ -22,19 +22,19 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT27]]
 ; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 16
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[DCT]], align 2, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD29:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2, !alias.scope [[META0]], !noalias [[META3]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_LOAD29]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD29]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 16
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD30:%.*]] = load <8 x i16>, ptr [[BIAS]], align 2, !alias.scope [[META6:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD31:%.*]] = load <8 x i16>, ptr [[TMP5]], align 2, !alias.scope [[META6]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <8 x i16> [[WIDE_LOAD30]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <8 x i16> [[WIDE_LOAD31]] to <8 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 16
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD32:%.*]] = load <8 x i16>, ptr [[MF]], align 2, !alias.scope [[META7:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD33:%.*]] = load <8 x i16>, ptr [[TMP8]], align 2, !alias.scope [[META7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext <8 x i16> [[WIDE_LOAD32]] to <8 x i32>
@@ -95,14 +95,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i16 [ [[CONV28]], [[IF_ELSE]] ], [ [[CONV12]], [[IF_THEN]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE]], ptr [[DCT]], align 2
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 2
 ; CHECK-NEXT:    [[TMP34:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 2
 ; CHECK-NEXT:    [[CONV_1:%.*]] = sext i16 [[TMP34]] to i32
 ; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp sgt i16 [[TMP34]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 2
 ; CHECK-NEXT:    [[TMP35:%.*]] = load i16, ptr [[ARRAYIDX4_1]], align 2
 ; CHECK-NEXT:    [[CONV5_1:%.*]] = zext i16 [[TMP35]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_1:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX10_1:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 2
 ; CHECK-NEXT:    [[TMP36:%.*]] = load i16, ptr [[ARRAYIDX10_1]], align 2
 ; CHECK-NEXT:    [[CONV11_1:%.*]] = zext i16 [[TMP36]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_1]], label [[IF_THEN_1:%.*]], label [[IF_ELSE_1:%.*]]
@@ -123,14 +123,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_1:%.*]] = phi i16 [ [[CONV28_1]], [[IF_ELSE_1]] ], [ [[CONV12_1]], [[IF_THEN_1]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_1]], ptr [[ARRAYIDX_1]], align 2
 ; CHECK-NEXT:    [[OR_137:%.*]] = or i16 [[STOREMERGE]], [[STOREMERGE_1]]
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 4
 ; CHECK-NEXT:    [[TMP38:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 2
 ; CHECK-NEXT:    [[CONV_2:%.*]] = sext i16 [[TMP38]] to i32
 ; CHECK-NEXT:    [[CMP1_2:%.*]] = icmp sgt i16 [[TMP38]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_2:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX4_2:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 4
 ; CHECK-NEXT:    [[TMP39:%.*]] = load i16, ptr [[ARRAYIDX4_2]], align 2
 ; CHECK-NEXT:    [[CONV5_2:%.*]] = zext i16 [[TMP39]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX10_2:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 4
 ; CHECK-NEXT:    [[TMP40:%.*]] = load i16, ptr [[ARRAYIDX10_2]], align 2
 ; CHECK-NEXT:    [[CONV11_2:%.*]] = zext i16 [[TMP40]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_2]], label [[IF_THEN_2:%.*]], label [[IF_ELSE_2:%.*]]
@@ -151,14 +151,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_2:%.*]] = phi i16 [ [[CONV28_2]], [[IF_ELSE_2]] ], [ [[CONV12_2]], [[IF_THEN_2]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_2]], ptr [[ARRAYIDX_2]], align 2
 ; CHECK-NEXT:    [[OR_238:%.*]] = or i16 [[OR_137]], [[STOREMERGE_2]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 6
 ; CHECK-NEXT:    [[TMP42:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 2
 ; CHECK-NEXT:    [[CONV_3:%.*]] = sext i16 [[TMP42]] to i32
 ; CHECK-NEXT:    [[CMP1_3:%.*]] = icmp sgt i16 [[TMP42]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_3:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX4_3:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 6
 ; CHECK-NEXT:    [[TMP43:%.*]] = load i16, ptr [[ARRAYIDX4_3]], align 2
 ; CHECK-NEXT:    [[CONV5_3:%.*]] = zext i16 [[TMP43]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_3:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX10_3:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 6
 ; CHECK-NEXT:    [[TMP44:%.*]] = load i16, ptr [[ARRAYIDX10_3]], align 2
 ; CHECK-NEXT:    [[CONV11_3:%.*]] = zext i16 [[TMP44]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_3]], label [[IF_THEN_3:%.*]], label [[IF_ELSE_3:%.*]]
@@ -179,14 +179,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_3:%.*]] = phi i16 [ [[CONV28_3]], [[IF_ELSE_3]] ], [ [[CONV12_3]], [[IF_THEN_3]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_3]], ptr [[ARRAYIDX_3]], align 2
 ; CHECK-NEXT:    [[OR_339:%.*]] = or i16 [[OR_238]], [[STOREMERGE_3]]
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 8
 ; CHECK-NEXT:    [[TMP46:%.*]] = load i16, ptr [[ARRAYIDX_4]], align 2
 ; CHECK-NEXT:    [[CONV_4:%.*]] = sext i16 [[TMP46]] to i32
 ; CHECK-NEXT:    [[CMP1_4:%.*]] = icmp sgt i16 [[TMP46]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_4:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX4_4:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 8
 ; CHECK-NEXT:    [[TMP47:%.*]] = load i16, ptr [[ARRAYIDX4_4]], align 2
 ; CHECK-NEXT:    [[CONV5_4:%.*]] = zext i16 [[TMP47]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_4:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX10_4:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 8
 ; CHECK-NEXT:    [[TMP48:%.*]] = load i16, ptr [[ARRAYIDX10_4]], align 2
 ; CHECK-NEXT:    [[CONV11_4:%.*]] = zext i16 [[TMP48]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_4]], label [[IF_THEN_4:%.*]], label [[IF_ELSE_4:%.*]]
@@ -207,14 +207,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_4:%.*]] = phi i16 [ [[CONV28_4]], [[IF_ELSE_4]] ], [ [[CONV12_4]], [[IF_THEN_4]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_4]], ptr [[ARRAYIDX_4]], align 2
 ; CHECK-NEXT:    [[OR_440:%.*]] = or i16 [[OR_339]], [[STOREMERGE_4]]
-; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 10
 ; CHECK-NEXT:    [[TMP50:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
 ; CHECK-NEXT:    [[CONV_5:%.*]] = sext i16 [[TMP50]] to i32
 ; CHECK-NEXT:    [[CMP1_5:%.*]] = icmp sgt i16 [[TMP50]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_5:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX4_5:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 10
 ; CHECK-NEXT:    [[TMP51:%.*]] = load i16, ptr [[ARRAYIDX4_5]], align 2
 ; CHECK-NEXT:    [[CONV5_5:%.*]] = zext i16 [[TMP51]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_5:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 10
+; CHECK-NEXT:    [[ARRAYIDX10_5:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 10
 ; CHECK-NEXT:    [[TMP52:%.*]] = load i16, ptr [[ARRAYIDX10_5]], align 2
 ; CHECK-NEXT:    [[CONV11_5:%.*]] = zext i16 [[TMP52]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_5]], label [[IF_THEN_5:%.*]], label [[IF_ELSE_5:%.*]]
@@ -235,14 +235,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_5:%.*]] = phi i16 [ [[CONV28_5]], [[IF_ELSE_5]] ], [ [[CONV12_5]], [[IF_THEN_5]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_5]], ptr [[ARRAYIDX_5]], align 2
 ; CHECK-NEXT:    [[OR_541:%.*]] = or i16 [[OR_440]], [[STOREMERGE_5]]
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 12
 ; CHECK-NEXT:    [[TMP54:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
 ; CHECK-NEXT:    [[CONV_6:%.*]] = sext i16 [[TMP54]] to i32
 ; CHECK-NEXT:    [[CMP1_6:%.*]] = icmp sgt i16 [[TMP54]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_6:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX4_6:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 12
 ; CHECK-NEXT:    [[TMP55:%.*]] = load i16, ptr [[ARRAYIDX4_6]], align 2
 ; CHECK-NEXT:    [[CONV5_6:%.*]] = zext i16 [[TMP55]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_6:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 12
+; CHECK-NEXT:    [[ARRAYIDX10_6:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 12
 ; CHECK-NEXT:    [[TMP56:%.*]] = load i16, ptr [[ARRAYIDX10_6]], align 2
 ; CHECK-NEXT:    [[CONV11_6:%.*]] = zext i16 [[TMP56]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_6]], label [[IF_THEN_6:%.*]], label [[IF_ELSE_6:%.*]]
@@ -263,14 +263,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_6:%.*]] = phi i16 [ [[CONV28_6]], [[IF_ELSE_6]] ], [ [[CONV12_6]], [[IF_THEN_6]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_6]], ptr [[ARRAYIDX_6]], align 2
 ; CHECK-NEXT:    [[OR_642:%.*]] = or i16 [[OR_541]], [[STOREMERGE_6]]
-; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 14
 ; CHECK-NEXT:    [[TMP58:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
 ; CHECK-NEXT:    [[CONV_7:%.*]] = sext i16 [[TMP58]] to i32
 ; CHECK-NEXT:    [[CMP1_7:%.*]] = icmp sgt i16 [[TMP58]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_7:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX4_7:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 14
 ; CHECK-NEXT:    [[TMP59:%.*]] = load i16, ptr [[ARRAYIDX4_7]], align 2
 ; CHECK-NEXT:    [[CONV5_7:%.*]] = zext i16 [[TMP59]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_7:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 14
+; CHECK-NEXT:    [[ARRAYIDX10_7:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 14
 ; CHECK-NEXT:    [[TMP60:%.*]] = load i16, ptr [[ARRAYIDX10_7]], align 2
 ; CHECK-NEXT:    [[CONV11_7:%.*]] = zext i16 [[TMP60]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_7]], label [[IF_THEN_7:%.*]], label [[IF_ELSE_7:%.*]]
@@ -291,14 +291,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_7:%.*]] = phi i16 [ [[CONV28_7]], [[IF_ELSE_7]] ], [ [[CONV12_7]], [[IF_THEN_7]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_7]], ptr [[ARRAYIDX_7]], align 2
 ; CHECK-NEXT:    [[OR_743:%.*]] = or i16 [[OR_642]], [[STOREMERGE_7]]
-; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 16
+; CHECK-NEXT:    [[ARRAYIDX_8:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 16
 ; CHECK-NEXT:    [[TMP62:%.*]] = load i16, ptr [[ARRAYIDX_8]], align 2
 ; CHECK-NEXT:    [[CONV_8:%.*]] = sext i16 [[TMP62]] to i32
 ; CHECK-NEXT:    [[CMP1_8:%.*]] = icmp sgt i16 [[TMP62]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_8:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 16
+; CHECK-NEXT:    [[ARRAYIDX4_8:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 16
 ; CHECK-NEXT:    [[TMP63:%.*]] = load i16, ptr [[ARRAYIDX4_8]], align 2
 ; CHECK-NEXT:    [[CONV5_8:%.*]] = zext i16 [[TMP63]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_8:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 16
+; CHECK-NEXT:    [[ARRAYIDX10_8:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 16
 ; CHECK-NEXT:    [[TMP64:%.*]] = load i16, ptr [[ARRAYIDX10_8]], align 2
 ; CHECK-NEXT:    [[CONV11_8:%.*]] = zext i16 [[TMP64]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_8]], label [[IF_THEN_8:%.*]], label [[IF_ELSE_8:%.*]]
@@ -319,14 +319,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_8:%.*]] = phi i16 [ [[CONV28_8]], [[IF_ELSE_8]] ], [ [[CONV12_8]], [[IF_THEN_8]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_8]], ptr [[ARRAYIDX_8]], align 2
 ; CHECK-NEXT:    [[OR_844:%.*]] = or i16 [[OR_743]], [[STOREMERGE_8]]
-; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 18
+; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 18
 ; CHECK-NEXT:    [[TMP66:%.*]] = load i16, ptr [[ARRAYIDX_9]], align 2
 ; CHECK-NEXT:    [[CONV_9:%.*]] = sext i16 [[TMP66]] to i32
 ; CHECK-NEXT:    [[CMP1_9:%.*]] = icmp sgt i16 [[TMP66]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_9:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 18
+; CHECK-NEXT:    [[ARRAYIDX4_9:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 18
 ; CHECK-NEXT:    [[TMP67:%.*]] = load i16, ptr [[ARRAYIDX4_9]], align 2
 ; CHECK-NEXT:    [[CONV5_9:%.*]] = zext i16 [[TMP67]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_9:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 18
+; CHECK-NEXT:    [[ARRAYIDX10_9:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 18
 ; CHECK-NEXT:    [[TMP68:%.*]] = load i16, ptr [[ARRAYIDX10_9]], align 2
 ; CHECK-NEXT:    [[CONV11_9:%.*]] = zext i16 [[TMP68]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_9]], label [[IF_THEN_9:%.*]], label [[IF_ELSE_9:%.*]]
@@ -347,14 +347,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_9:%.*]] = phi i16 [ [[CONV28_9]], [[IF_ELSE_9]] ], [ [[CONV12_9]], [[IF_THEN_9]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_9]], ptr [[ARRAYIDX_9]], align 2
 ; CHECK-NEXT:    [[OR_945:%.*]] = or i16 [[OR_844]], [[STOREMERGE_9]]
-; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 20
+; CHECK-NEXT:    [[ARRAYIDX_10:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 20
 ; CHECK-NEXT:    [[TMP70:%.*]] = load i16, ptr [[ARRAYIDX_10]], align 2
 ; CHECK-NEXT:    [[CONV_10:%.*]] = sext i16 [[TMP70]] to i32
 ; CHECK-NEXT:    [[CMP1_10:%.*]] = icmp sgt i16 [[TMP70]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_10:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 20
+; CHECK-NEXT:    [[ARRAYIDX4_10:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 20
 ; CHECK-NEXT:    [[TMP71:%.*]] = load i16, ptr [[ARRAYIDX4_10]], align 2
 ; CHECK-NEXT:    [[CONV5_10:%.*]] = zext i16 [[TMP71]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_10:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 20
+; CHECK-NEXT:    [[ARRAYIDX10_10:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 20
 ; CHECK-NEXT:    [[TMP72:%.*]] = load i16, ptr [[ARRAYIDX10_10]], align 2
 ; CHECK-NEXT:    [[CONV11_10:%.*]] = zext i16 [[TMP72]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_10]], label [[IF_THEN_10:%.*]], label [[IF_ELSE_10:%.*]]
@@ -375,14 +375,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_10:%.*]] = phi i16 [ [[CONV28_10]], [[IF_ELSE_10]] ], [ [[CONV12_10]], [[IF_THEN_10]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_10]], ptr [[ARRAYIDX_10]], align 2
 ; CHECK-NEXT:    [[OR_1046:%.*]] = or i16 [[OR_945]], [[STOREMERGE_10]]
-; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 22
+; CHECK-NEXT:    [[ARRAYIDX_11:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 22
 ; CHECK-NEXT:    [[TMP74:%.*]] = load i16, ptr [[ARRAYIDX_11]], align 2
 ; CHECK-NEXT:    [[CONV_11:%.*]] = sext i16 [[TMP74]] to i32
 ; CHECK-NEXT:    [[CMP1_11:%.*]] = icmp sgt i16 [[TMP74]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_11:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 22
+; CHECK-NEXT:    [[ARRAYIDX4_11:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 22
 ; CHECK-NEXT:    [[TMP75:%.*]] = load i16, ptr [[ARRAYIDX4_11]], align 2
 ; CHECK-NEXT:    [[CONV5_11:%.*]] = zext i16 [[TMP75]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_11:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 22
+; CHECK-NEXT:    [[ARRAYIDX10_11:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 22
 ; CHECK-NEXT:    [[TMP76:%.*]] = load i16, ptr [[ARRAYIDX10_11]], align 2
 ; CHECK-NEXT:    [[CONV11_11:%.*]] = zext i16 [[TMP76]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_11]], label [[IF_THEN_11:%.*]], label [[IF_ELSE_11:%.*]]
@@ -403,14 +403,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_11:%.*]] = phi i16 [ [[CONV28_11]], [[IF_ELSE_11]] ], [ [[CONV12_11]], [[IF_THEN_11]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_11]], ptr [[ARRAYIDX_11]], align 2
 ; CHECK-NEXT:    [[OR_1147:%.*]] = or i16 [[OR_1046]], [[STOREMERGE_11]]
-; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 24
+; CHECK-NEXT:    [[ARRAYIDX_12:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 24
 ; CHECK-NEXT:    [[TMP78:%.*]] = load i16, ptr [[ARRAYIDX_12]], align 2
 ; CHECK-NEXT:    [[CONV_12:%.*]] = sext i16 [[TMP78]] to i32
 ; CHECK-NEXT:    [[CMP1_12:%.*]] = icmp sgt i16 [[TMP78]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_12:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 24
+; CHECK-NEXT:    [[ARRAYIDX4_12:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 24
 ; CHECK-NEXT:    [[TMP79:%.*]] = load i16, ptr [[ARRAYIDX4_12]], align 2
 ; CHECK-NEXT:    [[CONV5_12:%.*]] = zext i16 [[TMP79]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_12:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 24
+; CHECK-NEXT:    [[ARRAYIDX10_12:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 24
 ; CHECK-NEXT:    [[TMP80:%.*]] = load i16, ptr [[ARRAYIDX10_12]], align 2
 ; CHECK-NEXT:    [[CONV11_12:%.*]] = zext i16 [[TMP80]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_12]], label [[IF_THEN_12:%.*]], label [[IF_ELSE_12:%.*]]
@@ -431,14 +431,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_12:%.*]] = phi i16 [ [[CONV28_12]], [[IF_ELSE_12]] ], [ [[CONV12_12]], [[IF_THEN_12]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_12]], ptr [[ARRAYIDX_12]], align 2
 ; CHECK-NEXT:    [[OR_1248:%.*]] = or i16 [[OR_1147]], [[STOREMERGE_12]]
-; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 26
+; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 26
 ; CHECK-NEXT:    [[TMP82:%.*]] = load i16, ptr [[ARRAYIDX_13]], align 2
 ; CHECK-NEXT:    [[CONV_13:%.*]] = sext i16 [[TMP82]] to i32
 ; CHECK-NEXT:    [[CMP1_13:%.*]] = icmp sgt i16 [[TMP82]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_13:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 26
+; CHECK-NEXT:    [[ARRAYIDX4_13:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 26
 ; CHECK-NEXT:    [[TMP83:%.*]] = load i16, ptr [[ARRAYIDX4_13]], align 2
 ; CHECK-NEXT:    [[CONV5_13:%.*]] = zext i16 [[TMP83]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_13:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 26
+; CHECK-NEXT:    [[ARRAYIDX10_13:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 26
 ; CHECK-NEXT:    [[TMP84:%.*]] = load i16, ptr [[ARRAYIDX10_13]], align 2
 ; CHECK-NEXT:    [[CONV11_13:%.*]] = zext i16 [[TMP84]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_13]], label [[IF_THEN_13:%.*]], label [[IF_ELSE_13:%.*]]
@@ -459,14 +459,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_13:%.*]] = phi i16 [ [[CONV28_13]], [[IF_ELSE_13]] ], [ [[CONV12_13]], [[IF_THEN_13]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_13]], ptr [[ARRAYIDX_13]], align 2
 ; CHECK-NEXT:    [[OR_1349:%.*]] = or i16 [[OR_1248]], [[STOREMERGE_13]]
-; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 28
+; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 28
 ; CHECK-NEXT:    [[TMP86:%.*]] = load i16, ptr [[ARRAYIDX_14]], align 2
 ; CHECK-NEXT:    [[CONV_14:%.*]] = sext i16 [[TMP86]] to i32
 ; CHECK-NEXT:    [[CMP1_14:%.*]] = icmp sgt i16 [[TMP86]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_14:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 28
+; CHECK-NEXT:    [[ARRAYIDX4_14:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 28
 ; CHECK-NEXT:    [[TMP87:%.*]] = load i16, ptr [[ARRAYIDX4_14]], align 2
 ; CHECK-NEXT:    [[CONV5_14:%.*]] = zext i16 [[TMP87]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_14:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 28
+; CHECK-NEXT:    [[ARRAYIDX10_14:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 28
 ; CHECK-NEXT:    [[TMP88:%.*]] = load i16, ptr [[ARRAYIDX10_14]], align 2
 ; CHECK-NEXT:    [[CONV11_14:%.*]] = zext i16 [[TMP88]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_14]], label [[IF_THEN_14:%.*]], label [[IF_ELSE_14:%.*]]
@@ -487,14 +487,14 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[STOREMERGE_14:%.*]] = phi i16 [ [[CONV28_14]], [[IF_ELSE_14]] ], [ [[CONV12_14]], [[IF_THEN_14]] ]
 ; CHECK-NEXT:    store i16 [[STOREMERGE_14]], ptr [[ARRAYIDX_14]], align 2
 ; CHECK-NEXT:    [[OR_1450:%.*]] = or i16 [[OR_1349]], [[STOREMERGE_14]]
-; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, ptr [[DCT]], i64 30
+; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds nuw i8, ptr [[DCT]], i64 30
 ; CHECK-NEXT:    [[TMP90:%.*]] = load i16, ptr [[ARRAYIDX_15]], align 2
 ; CHECK-NEXT:    [[CONV_15:%.*]] = sext i16 [[TMP90]] to i32
 ; CHECK-NEXT:    [[CMP1_15:%.*]] = icmp sgt i16 [[TMP90]], 0
-; CHECK-NEXT:    [[ARRAYIDX4_15:%.*]] = getelementptr inbounds i8, ptr [[BIAS]], i64 30
+; CHECK-NEXT:    [[ARRAYIDX4_15:%.*]] = getelementptr inbounds nuw i8, ptr [[BIAS]], i64 30
 ; CHECK-NEXT:    [[TMP91:%.*]] = load i16, ptr [[ARRAYIDX4_15]], align 2
 ; CHECK-NEXT:    [[CONV5_15:%.*]] = zext i16 [[TMP91]] to i32
-; CHECK-NEXT:    [[ARRAYIDX10_15:%.*]] = getelementptr inbounds i8, ptr [[MF]], i64 30
+; CHECK-NEXT:    [[ARRAYIDX10_15:%.*]] = getelementptr inbounds nuw i8, ptr [[MF]], i64 30
 ; CHECK-NEXT:    [[TMP92:%.*]] = load i16, ptr [[ARRAYIDX10_15]], align 2
 ; CHECK-NEXT:    [[CONV11_15:%.*]] = zext i16 [[TMP92]] to i32
 ; CHECK-NEXT:    br i1 [[CMP1_15]], label [[IF_THEN_15:%.*]], label [[IF_ELSE_15:%.*]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
index 885529926fc4b9..e4e2115a580c42 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/sinking-vs-if-conversion.ll
@@ -12,27 +12,27 @@ define void @test_find_min(ptr noundef nonnull align 8 dereferenceable(24) %this
 ; CHECK-LABEL: define void @test_find_min(
 ; CHECK-SAME: ptr nocapture noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[NUM_:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 16
+; CHECK-NEXT:    [[NUM_:%.*]] = getelementptr inbounds nuw i8, ptr [[THIS]], i64 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[NUM_]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[THIS]], align 8
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[ARRAY_:%.*]] = getelementptr inbounds i8, ptr [[THIS]], i64 8
+; CHECK-NEXT:    [[ARRAY_:%.*]] = getelementptr inbounds nuw i8, ptr [[THIS]], i64 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAY_]], align 8
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[TMP0]] to i64
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END7:%.*]] ]
 ; CHECK-NEXT:    [[MIN_010:%.*]] = phi ptr [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ [[COND8:%.*]], [[COND_END7]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr, ptr [[TMP2]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq ptr [[MIN_010]], null
 ; CHECK-NEXT:    br i1 [[CMP3]], label [[COND_END7]], label [[COND_FALSE:%.*]]
 ; CHECK:       cond.false:
 ; CHECK-NEXT:    [[KEY2:%.*]] = getelementptr inbounds i8, ptr [[MIN_010]], i64 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[KEY2]], align 4
-; CHECK-NEXT:    [[KEY:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 4
+; CHECK-NEXT:    [[KEY:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[KEY]], align 4
 ; CHECK-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], ptr [[TMP3]], ptr [[MIN_010]]
@@ -143,17 +143,17 @@ define void @cond_select_loop(ptr noalias nocapture noundef readonly %a, ptr noa
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I_07]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I_07]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_END]], label [[COND_FALSE:%.*]]
 ; CHECK:       cond.false:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I_07]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[I_07]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[COND_END]]
 ; CHECK:       cond.end:
 ; CHECK-NEXT:    [[COND:%.*]] = phi float [ [[TMP1]], [[COND_FALSE]] ], [ [[TMP0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I_07]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[I_07]]
 ; CHECK-NEXT:    store float [[COND]], ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1000

diff  --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
index 8d31a7d25ff3da..1cf20cb1fd64d3 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
@@ -16,20 +16,20 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IP1]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[IP2]] to i64
-; CHECK-NEXT:    [[RRRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4
-; CHECK-NEXT:    [[RRRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 4
 ; CHECK-NEXT:    [[RDD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[RDD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[RRRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR]], i64 4
-; CHECK-NEXT:    [[RRRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64]], i64 4
 ; CHECK-NEXT:    [[RDD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[RDD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[RRRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR_1]], i64 4
-; CHECK-NEXT:    [[RRRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_1]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_1]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_1]], i64 4
 ; CHECK-NEXT:    [[RDD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR_1]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[RDD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_1]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[RRRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR_2]], i64 4
-; CHECK-NEXT:    [[RRRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[RDD_PTR64_2]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR_2]], i64 4
+; CHECK-NEXT:    [[RRRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[RDD_PTR64_2]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[RRRAYIDX3]], align 1, !tbaa [[TBAA0]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
index ed25734c8448fb..ad869268f7e654 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll
@@ -13,13 +13,13 @@ define void @test_known_trip_count() {
 ; CHECK-LABEL: @test_known_trip_count(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr @b, align 16
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr getelementptr inbounds (i8, ptr @b, i64 16), align 16
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <2 x double>, ptr getelementptr inbounds nuw (i8, ptr @b, i64 16), align 16
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr @c, align 16
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr getelementptr inbounds (i8, ptr @c, i64 16), align 16
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr getelementptr inbounds nuw (i8, ptr @c, i64 16), align 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD4]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[WIDE_LOAD3]], [[WIDE_LOAD5]]
 ; CHECK-NEXT:    store <2 x double> [[TMP0]], ptr @a, align 16
-; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr getelementptr inbounds (i8, ptr @a, i64 16), align 16
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 16), align 16
 ; CHECK-NEXT:    [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr getelementptr inbounds (i8, ptr @b, i64 32), align 16
 ; CHECK-NEXT:    [[WIDE_LOAD3_1:%.*]] = load <2 x double>, ptr getelementptr inbounds (i8, ptr @b, i64 48), align 16
 ; CHECK-NEXT:    [[WIDE_LOAD4_1:%.*]] = load <2 x double>, ptr getelementptr inbounds (i8, ptr @c, i64 32), align 16
@@ -132,10 +132,10 @@ define void @test_known_trip_count() {
 ; CHECK-NEXT:    [[TMP29:%.*]] = fadd <2 x double> [[WIDE_LOAD3_14]], [[WIDE_LOAD5_14]]
 ; CHECK-NEXT:    store <2 x double> [[TMP28]], ptr getelementptr inbounds (i8, ptr @a, i64 448), align 16
 ; CHECK-NEXT:    store <2 x double> [[TMP29]], ptr getelementptr inbounds (i8, ptr @a, i64 464), align 16
-; CHECK-NEXT:    [[TMP30:%.*]] = load double, ptr getelementptr inbounds (i8, ptr @b, i64 480), align 16
-; CHECK-NEXT:    [[TMP31:%.*]] = load double, ptr getelementptr inbounds (i8, ptr @c, i64 480), align 16
+; CHECK-NEXT:    [[TMP30:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @b, i64 480), align 16
+; CHECK-NEXT:    [[TMP31:%.*]] = load double, ptr getelementptr inbounds nuw (i8, ptr @c, i64 480), align 16
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP30]], [[TMP31]]
-; CHECK-NEXT:    store double [[ADD]], ptr getelementptr inbounds (i8, ptr @a, i64 480), align 16
+; CHECK-NEXT:    store double [[ADD]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 480), align 16
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -180,17 +180,17 @@ define void @test_runtime_trip_count(i32 %N) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 16
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x double>, ptr [[TMP1]], align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP2]], align 16
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], [[WIDE_LOAD5]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD4]], [[WIDE_LOAD6]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16
 ; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TMP6]], align 16
 ; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -204,12 +204,12 @@ define void @test_runtime_trip_count(i32 %N) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER7]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [58 x double], ptr @b, i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [58 x double], ptr @c, i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [58 x double], ptr @a, i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
index 0db94bea46c402..63e85731e8c700 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
@@ -59,7 +59,7 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
 ; O2:       vector.body:
 ; O2-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_BODY4_PREHEADER]] ]
 ; O2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
-; O2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; O2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; O2-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
 ; O2-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
@@ -104,7 +104,7 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
 ; O3:       vector.body:
 ; O3-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ]
 ; O3-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[INDEX]]
-; O3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; O3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; O3-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
 ; O3-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
index d794106ede3b25..285e390f1b61d4 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
@@ -12,7 +12,7 @@ define i32 @f(i32 noundef %x) {
 ; CHECK-NEXT:    br i1 [[TMP0]], label %[[SWITCH_LOOKUP:.*]], label %[[SW_EPILOG:.*]]
 ; CHECK:       [[SWITCH_LOOKUP]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[X]] to i64
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds nuw [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
 ; CHECK-NEXT:    br label %[[SW_EPILOG]]
 ; CHECK:       [[SW_EPILOG]]:

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
index 7109c0ff4d55b1..aa4884748ef8c7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
@@ -15,7 +15,7 @@ define range(i32 0, 2) i32 @f(i32 noundef %x) local_unnamed_addr {
 ; CHECK-NEXT:    br i1 [[TMP0]], label %[[SWITCH_LOOKUP:.*]], label %[[SW_EPILOG:.*]]
 ; CHECK:       [[SWITCH_LOOKUP]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[X]] to i64
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds nuw [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
 ; CHECK-NEXT:    br label %[[SW_EPILOG]]
 ; CHECK:       [[SW_EPILOG]]:

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
index 580c13d50b1f51..8994afcdc629d0 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
@@ -33,7 +33,7 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[PIN:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
@@ -43,7 +43,7 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], splat (i32 -16777216)
 ; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], splat (i32 -16777216)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[POUT:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -57,12 +57,12 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER5]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[PIN]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[PIN]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP11]] to i32
 ; CHECK-NEXT:    [[OR2:%.*]] = mul nuw nsw i32 [[CONV]], 65793
 ; CHECK-NEXT:    [[OR3:%.*]] = or disjoint i32 [[OR2]], -16777216
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[POUT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[POUT]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i32 [[OR3]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
index 549e4e036a747b..8fbf45945cf3ef 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
@@ -10,8 +10,8 @@ define void @trunc_through_one_add(ptr noalias %0, ptr noalias readonly %1) {
 ; SSE-NEXT:    [[TMP6:%.*]] = add nuw nsw <8 x i16> [[TMP5]], [[TMP4]]
 ; SSE-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], splat (i16 2)
 ; SSE-NEXT:    store <8 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2
-; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i8>, ptr [[TMP8]], align 1
 ; SSE-NEXT:    [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16>
 ; SSE-NEXT:    [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], splat (i16 1)
@@ -185,9 +185,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr
 ; SSE-NEXT:    [[TMP10:%.*]] = add nuw nsw <8 x i16> [[TMP9]], [[TMP8]]
 ; SSE-NEXT:    [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], splat (i16 2)
 ; SSE-NEXT:    store <8 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2
-; SSE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
-; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; SSE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8
+; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP15:%.*]] = load <8 x i8>, ptr [[TMP12]], align 1
 ; SSE-NEXT:    [[TMP16:%.*]] = zext <8 x i8> [[TMP15]] to <8 x i16>
 ; SSE-NEXT:    [[TMP17:%.*]] = load <8 x i8>, ptr [[TMP13]], align 1

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
index 076cc3f4dddd15..d79ffb0149ff87 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
@@ -14,7 +14,7 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
 ; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
 ; CHECK:       [[FOR_BODY_PREHEADER]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
-; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[TMP0]]
+; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[FOR_BODY_PREHEADER14]]:
@@ -48,10 +48,10 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
-; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[GEP_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
 ; CHECK-NEXT:    [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
 ; CHECK-NEXT:    [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll b/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
index 10193df6f179ab..08ae28ce782c20 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/simplifycfg-late.ll
@@ -14,7 +14,7 @@ define i32 @f(i32 %c) {
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[SWITCH_TABLEIDX]] to i64
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.f, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds nuw [7 x i32], ptr @switch.table.f, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
index 92ce8eb5b5ac23..ec387d6ae44f28 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
@@ -40,7 +40,7 @@ define void @licm(ptr align 8 dereferenceable(8) %_M_start.i, i64 %numElem) {
 ; O23:       vector.body:
 ; O23-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; O23-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[INDEX]]
-; O23-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; O23-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[TBAA8]]
 ; O23-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
index e645a309b47930..a715736df887fa 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
@@ -16,10 +16,10 @@ define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(ptr %FVP) {
 ; O1-LABEL: define {{[^@]+}}@_Z13vecIncFromPtrP12FloatVecPair
 ; O1-SAME: (ptr nocapture readonly [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; O1-NEXT:  entry:
-; O1-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds i8, ptr [[FVP]], i64 16
+; O1-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds nuw i8, ptr [[FVP]], i64 16
 ; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; O1-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef
-; O1-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_I_I]], i64 8
+; O1-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_I_I]], i64 8
 ; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
 ; O1-NEXT:    [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; O1-NEXT:    br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
@@ -46,10 +46,10 @@ define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(ptr %FVP) {
 ; O23-LABEL: define {{[^@]+}}@_Z13vecIncFromPtrP12FloatVecPair
 ; O23-SAME: (ptr nocapture readonly [[FVP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; O23-NEXT:  entry:
-; O23-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds i8, ptr [[FVP]], i64 16
+; O23-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds nuw i8, ptr [[FVP]], i64 16
 ; O23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; O23-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef
-; O23-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX_I_I]], i64 8
+; O23-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX_I_I]], i64 8
 ; O23-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
 ; O23-NEXT:    [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; O23-NEXT:    br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index 2ba2935e07e2f4..7349f603822cd7 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -40,9 +40,9 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 32
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 64
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 96
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 96
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[TBAA3]]
@@ -52,9 +52,9 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 32
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 96
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 32
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 64
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 96
 ; CHECK-NEXT:    store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[TBAA3]]
@@ -101,10 +101,10 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK:       for.body.prol:
 ; CHECK-NEXT:    [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
 ; CHECK-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_PROL]]
+; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_PROL]]
 ; CHECK-NEXT:    [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fmul fast double [[T0_PROL]], [[TMP18]]
-; CHECK-NEXT:    [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_PROL]]
+; CHECK-NEXT:    [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_PROL]]
 ; CHECK-NEXT:    store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
 ; CHECK-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
@@ -127,52 +127,52 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = fmul fast double [[T0_1]], [[TMP23]]
-; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = fmul fast double [[T0_2]], [[TMP24]]
-; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]]
+; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]]
 ; CHECK-NEXT:    store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = fmul fast double [[T0_3]], [[TMP25]]
-; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]]
+; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]]
 ; CHECK-NEXT:    store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]]
 ; CHECK-NEXT:    [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = fmul fast double [[T0_4]], [[TMP26]]
-; CHECK-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]]
+; CHECK-NEXT:    [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]]
 ; CHECK-NEXT:    store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
-; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]]
 ; CHECK-NEXT:    [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = fmul fast double [[T0_5]], [[TMP27]]
-; CHECK-NEXT:    [[ARRAYIDX2_5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]]
+; CHECK-NEXT:    [[ARRAYIDX2_5:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]]
 ; CHECK-NEXT:    store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]]
 ; CHECK-NEXT:    [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = fmul fast double [[T0_6]], [[TMP28]]
-; CHECK-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]]
+; CHECK-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]]
 ; CHECK-NEXT:    store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
-; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]]
 ; CHECK-NEXT:    [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = fmul fast double [[T0_7]], [[TMP29]]
-; CHECK-NEXT:    [[ARRAYIDX2_7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]]
+; CHECK-NEXT:    [[ARRAYIDX2_7:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]]
 ; CHECK-NEXT:    store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; CHECK-NEXT:    [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
index 2296531bd1de60..85f6fceb5bdbed 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
@@ -48,7 +48,7 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de
 ; SSE-LABEL: @ConvertVectors_ByVal(
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V:%.*]], align 16
-; SSE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 8
+; SSE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
 ; SSE-NEXT:    [[V_VAL421:%.*]] = load i64, ptr [[TMP1]], align 8
 ; SSE-NEXT:    [[TMP2:%.*]] = trunc i64 [[V_VAL421]] to i32
 ; SSE-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
@@ -59,7 +59,7 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de
 ; AVX-LABEL: @ConvertVectors_ByVal(
 ; AVX-NEXT:  entry:
 ; AVX-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V:%.*]], align 16
-; AVX-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[V]], i64 8
+; AVX-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
 ; AVX-NEXT:    [[V_VAL421:%.*]] = load i64, ptr [[TMP1]], align 8
 ; AVX-NEXT:    [[TMP2:%.*]] = trunc i64 [[V_VAL421]] to i32
 ; AVX-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
index 428147a1383b11..f98afb3b5243ed 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
@@ -37,17 +37,17 @@ define void @bar(ptr noundef %0) {
 ; SSE-NEXT:    [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], splat (i64 -1)
 ; SSE-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
 ; SSE-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], splat (i64 -1)
 ; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 32
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
 ; SSE-NEXT:    [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1)
 ; SSE-NEXT:    [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
-; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
+; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 48
 ; SSE-NEXT:    [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
 ; SSE-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], splat (i64 -1)
 ; SSE-NEXT:    [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], splat (i64 44)
@@ -59,7 +59,7 @@ define void @bar(ptr noundef %0) {
 ; AVX-NEXT:    [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], splat (i64 -1)
 ; AVX-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 44)
 ; AVX-NEXT:    store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 32
 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
 ; AVX-NEXT:    [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], splat (i64 -1)
 ; AVX-NEXT:    [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], splat (i64 44)

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll
index e9a42a859ab3df..e2c85bb5153698 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reduction-known-first-value.ll
@@ -17,7 +17,7 @@ define i16 @test(ptr %ptr) {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i16> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16>

diff  --git a/llvm/test/Transforms/PhaseOrdering/basic.ll b/llvm/test/Transforms/PhaseOrdering/basic.ll
index 343afedff1f852..5a8e5c4af7e248 100644
--- a/llvm/test/Transforms/PhaseOrdering/basic.ll
+++ b/llvm/test/Transforms/PhaseOrdering/basic.ll
@@ -35,7 +35,7 @@ define i32 @test2(i32 %a, ptr %p) nounwind uwtable ssp {
 ; CHECK-NEXT:    [[DIV1:%.*]] = lshr i32 [[A:%.*]], 2
 ; CHECK-NEXT:    store i32 [[DIV1]], ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = shl nuw nsw i32 [[DIV1]], 1
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll
index 4c9cd3090681eb..bbd4849c32296e 100644
--- a/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll
+++ b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll
@@ -14,7 +14,7 @@ define ptr @parent(ptr align 8 dereferenceable(72) %f, half %val1, i16 %val2, i3
 ; CHECK-LABEL: define noundef nonnull ptr @parent
 ; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], half [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[F]], i64 64
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[F]], i64 64
 ; CHECK-NEXT:    [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VAL1]] to i16
@@ -22,7 +22,7 @@ define ptr @parent(ptr align 8 dereferenceable(72) %f, half %val1, i16 %val2, i3
 ; CHECK-NEXT:    [[VAL2_SINK_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[TMP1]], i16 [[VAL2]]
 ; CHECK-NEXT:    [[VAL1_SINK_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], half [[TMP2]], half [[VAL1]]
 ; CHECK-NEXT:    store i16 [[VAL2_SINK_I]], ptr [[F_VAL]], align 2
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[F_VAL]], i64 16
 ; CHECK-NEXT:    store half [[VAL1_SINK_I]], ptr [[TMP3]], align 2
 ; CHECK-NEXT:    ret ptr [[F]]
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll
index a3d111fffbbf6d..ee7698b116aa23 100644
--- a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll
+++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll
@@ -16,13 +16,13 @@ define ptr @parent(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32
 ; CHECK-LABEL: define noundef nonnull ptr @parent
 ; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], i16 [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[F]], i64 64
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[F]], i64 64
 ; CHECK-NEXT:    [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0
 ; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL1]], i16 [[VAL2]]
 ; CHECK-NEXT:    [[SPEC_SELECT2_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL2]], i16 [[VAL1]]
 ; CHECK-NEXT:    store i16 [[SPEC_SELECT_I]], ptr [[F_VAL]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[F_VAL]], i64 16
 ; CHECK-NEXT:    store i16 [[SPEC_SELECT2_I]], ptr [[TMP1]], align 2
 ; CHECK-NEXT:    ret ptr [[F]]
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll b/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll
index 862f40a9ae2e98..e156111bd445e6 100644
--- a/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll
+++ b/llvm/test/Transforms/PhaseOrdering/gvn-replacement-vs-hoist.ll
@@ -7,7 +7,7 @@ define void @test(ptr noundef %a, i32 noundef %beam) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[BEAM]], 1
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[MUL]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
@@ -21,7 +21,7 @@ define void @test(ptr noundef %a, i32 noundef %beam) {
 ; CHECK:       if.else:
 ; CHECK-NEXT:    [[MUL2:%.*]] = shl nuw nsw i32 [[I_06]], 1
 ; CHECK-NEXT:    [[IDXPROM3:%.*]] = zext nneg i32 [[MUL2]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IDXPROM3]]
 ; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:

diff  --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
index bd509509c321f8..26920b053e7e72 100644
--- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
@@ -133,7 +133,7 @@ define void @foo(ptr noundef nonnull align 8 dereferenceable(24) noalias %vec) #
 ; CHECK-LABEL: define void @foo
 ; CHECK-SAME: (ptr noalias nocapture noundef nonnull readonly align 8 dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[_M_FINISH_I_I:%.*]] = getelementptr inbounds i8, ptr [[VEC]], i64 8
+; CHECK-NEXT:    [[_M_FINISH_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[SUB_PTR_LHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
@@ -272,7 +272,7 @@ define void @loop_with_signed_induction(ptr noundef nonnull align 8 dereferencea
 ; CHECK-LABEL: define void @loop_with_signed_induction
 ; CHECK-SAME: (ptr nocapture noundef nonnull readonly align 8 dereferenceable(24) [[VEC:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[_M_FINISH_I_I:%.*]] = getelementptr inbounds i8, ptr [[VEC]], i64 8
+; CHECK-NEXT:    [[_M_FINISH_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[_M_FINISH_I_I]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VEC]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    [[SUB_PTR_LHS_CAST_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
@@ -285,7 +285,7 @@ define void @loop_with_signed_induction(ptr noundef nonnull align 8 dereferencea
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 [[I_010]]
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 [[I_010]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00
 ; CHECK-NEXT:    store double [[ADD]], ptr [[ADD_PTR_I]], align 8, !tbaa [[TBAA6]]
@@ -359,7 +359,7 @@ define void @monkey(ptr noundef %arr, i32 noundef %len) {
 ; CHECK:       for.body4:
 ; CHECK-NEXT:    [[K_07:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY4]] ], [ [[I_09]], [[FOR_BODY4_PREHEADER]] ]
 ; CHECK-NEXT:    [[IDX_EXT_I:%.*]] = zext i32 [[K_07]] to i64
-; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[IDX_EXT_I]]
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds nuw i32, ptr [[ARR]], i64 [[IDX_EXT_I]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[ADD_PTR_I]], align 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
index 72921acba5969f..5435bc2eb8b376 100644
--- a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
+++ b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
@@ -28,7 +28,7 @@ define internal void @child(ptr %p, ptr %n, ptr %c) noinline {
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]]
 ; CHECK:       [[FOR_INC]]:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TMP0]], 5
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX]], align 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/lto-licm.ll b/llvm/test/Transforms/PhaseOrdering/lto-licm.ll
index 763e266e6a3829..8b51f9f4655879 100644
--- a/llvm/test/Transforms/PhaseOrdering/lto-licm.ll
+++ b/llvm/test/Transforms/PhaseOrdering/lto-licm.ll
@@ -12,7 +12,7 @@ define void @hoist_fdiv(ptr %a, float %b) {
 ; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    store float [[TMP2]], ptr [[ARRAYIDX]], align 4

diff  --git a/llvm/test/Transforms/PhaseOrdering/pr39282.ll b/llvm/test/Transforms/PhaseOrdering/pr39282.ll
index 2bbc01b677bb69..583f1cbc6c5a10 100644
--- a/llvm/test/Transforms/PhaseOrdering/pr39282.ll
+++ b/llvm/test/Transforms/PhaseOrdering/pr39282.ll
@@ -19,22 +19,22 @@ define void @pr39282(ptr %addr1, ptr %addr2) {
 ; CHECK-NEXT:  start:
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]])
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]])
-; CHECK-NEXT:    [[X_I:%.*]] = load i32, ptr [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    store i32 [[X_I]], ptr [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3
-; CHECK-NEXT:    [[ADDR1I_1:%.*]] = getelementptr inbounds i8, ptr [[ADDR1]], i64 4
-; CHECK-NEXT:    [[ADDR2I_1:%.*]] = getelementptr inbounds i8, ptr [[ADDR2]], i64 4
+; CHECK-NEXT:    [[X_I:%.*]] = load i32, ptr [[ADDR1:%.*]], align 4, !alias.scope [[META3]], !noalias [[META0]]
+; CHECK-NEXT:    store i32 [[X_I]], ptr [[ADDR2:%.*]], align 4, !alias.scope [[META0]], !noalias [[META3]]
+; CHECK-NEXT:    [[ADDR1I_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADDR1]], i64 4
+; CHECK-NEXT:    [[ADDR2I_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADDR2]], i64 4
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]])
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]])
-; CHECK-NEXT:    [[X_I_1:%.*]] = load i32, ptr [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5
-; CHECK-NEXT:    store i32 [[X_I_1]], ptr [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT:    [[X_I_1:%.*]] = load i32, ptr [[ADDR1I_1]], align 4, !alias.scope [[META7]], !noalias [[META5]]
+; CHECK-NEXT:    store i32 [[X_I_1]], ptr [[ADDR2I_1]], align 4, !alias.scope [[META5]], !noalias [[META7]]
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]])
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]])
-; CHECK-NEXT:    [[X_I_2:%.*]] = load i32, ptr [[ADDR1]], align 4, !alias.scope !11, !noalias !9
-; CHECK-NEXT:    store i32 [[X_I_2]], ptr [[ADDR2]], align 4, !alias.scope !9, !noalias !11
+; CHECK-NEXT:    [[X_I_2:%.*]] = load i32, ptr [[ADDR1]], align 4, !alias.scope [[META11]], !noalias [[META9]]
+; CHECK-NEXT:    store i32 [[X_I_2]], ptr [[ADDR2]], align 4, !alias.scope [[META9]], !noalias [[META11]]
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
 ; CHECK-NEXT:    tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
-; CHECK-NEXT:    [[X_I_3:%.*]] = load i32, ptr [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13
-; CHECK-NEXT:    store i32 [[X_I_3]], ptr [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15
+; CHECK-NEXT:    [[X_I_3:%.*]] = load i32, ptr [[ADDR1I_1]], align 4, !alias.scope [[META15]], !noalias [[META13]]
+; CHECK-NEXT:    store i32 [[X_I_3]], ptr [[ADDR2I_1]], align 4, !alias.scope [[META13]], !noalias [[META15]]
 ; CHECK-NEXT:    ret void
 ;
 start:

diff  --git a/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll b/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll
index 17073fa1982025..bad07c19f14b3e 100644
--- a/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/pr98799-inline-simplifycfg-ub.ll
@@ -37,7 +37,7 @@ define i32 @foo(ptr %arg, i1 %arg1) {
 ; O2-NEXT:    [[TMP0:%.*]] = xor i1 [[ARG1]], true
 ; O2-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
 ; O2-NEXT:    [[I_I:%.*]] = load ptr, ptr [[ARG]], align 8, !nonnull [[META0:![0-9]+]], !noundef [[META0]]
-; O2-NEXT:    [[I3_I:%.*]] = getelementptr inbounds i8, ptr [[I_I]], i64 1
+; O2-NEXT:    [[I3_I:%.*]] = getelementptr inbounds nuw i8, ptr [[I_I]], i64 1
 ; O2-NEXT:    store ptr [[I3_I]], ptr [[ARG]], align 8
 ; O2-NEXT:    [[I3:%.*]] = load i32, ptr [[I_I]], align 4
 ; O2-NEXT:    ret i32 [[I3]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
index d5a422ad41f559..af123ad2367faf 100644
--- a/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
+++ b/llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
@@ -21,7 +21,7 @@ define void @test1(i32 %d, ptr %p) nounwind uwtable ssp {
 ; CHECK-NEXT:    --> {%p,+,(8 * (%d /u 4))}<%for.body> U: full-set S: full-set Exits: ((504 * (%d /u 4)) + %p) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %add.ptr = getelementptr inbounds i32, ptr %p.addr.02, i32 %div1
 ; CHECK-NEXT:    --> {((4 * (%d /u 4))<nuw><nsw> + %p),+,(8 * (%d /u 4))}<%for.body> U: full-set S: full-set Exits: ((508 * (%d /u 4)) + %p) LoopDispositions: { %for.body: Computable }
-; CHECK-NEXT:    %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %div1
+; CHECK-NEXT:    %add.ptr1 = getelementptr inbounds nuw i32, ptr %add.ptr, i32 %div1
 ; CHECK-NEXT:    --> {((8 * (%d /u 4)) + %p),+,(8 * (%d /u 4))}<%for.body> U: full-set S: full-set Exits: ((512 * (%d /u 4)) + %p) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %inc = add nuw nsw i32 %i.03, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%for.body> U: [1,65) S: [1,65) Exits: 64 LoopDispositions: { %for.body: Computable }
@@ -68,7 +68,7 @@ define void @test1a(i32 %d, ptr %p) nounwind uwtable ssp {
 ; CHECK-NEXT:    --> {%p,+,(8 * (%d /u 2))}<%for.body> U: full-set S: full-set Exits: ((504 * (%d /u 2)) + %p) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %add.ptr = getelementptr inbounds i32, ptr %p.addr.02, i32 %div1
 ; CHECK-NEXT:    --> {((4 * (%d /u 2))<nuw><nsw> + %p),+,(8 * (%d /u 2))}<%for.body> U: full-set S: full-set Exits: ((508 * (%d /u 2)) + %p) LoopDispositions: { %for.body: Computable }
-; CHECK-NEXT:    %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %div1
+; CHECK-NEXT:    %add.ptr1 = getelementptr inbounds nuw i32, ptr %add.ptr, i32 %div1
 ; CHECK-NEXT:    --> {((8 * (%d /u 2)) + %p),+,(8 * (%d /u 2))}<%for.body> U: full-set S: full-set Exits: ((512 * (%d /u 2)) + %p) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %inc = add nuw nsw i32 %i.03, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%for.body> U: [1,65) S: [1,65) Exits: 64 LoopDispositions: { %for.body: Computable }

diff  --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
index f5807c2105abe1..ba71299d6919e6 100644
--- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
+++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
@@ -9,7 +9,7 @@ define i1 @PR33605(i32 %a, i32 %b, ptr %c) {
 ; CHECK-LABEL: @PR33605(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[C:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[C:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[OR]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
index 3975a0dd33748b..cc3cd56616f323 100644
--- a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
@@ -11,13 +11,13 @@ define i16 @helper(i16 %0, i64 %x) {
 ; CHECK-NEXT:  start:
 ; CHECK-NEXT:    [[DATA:%.*]] = alloca [2 x i8], align 2
 ; CHECK-NEXT:    store i16 [[TMP0:%.*]], ptr [[DATA]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 1
 ; CHECK-NEXT:    br label [[BB6_I_I:%.*]]
 ; CHECK:       bb6.i.i:
 ; CHECK-NEXT:    [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
 ; CHECK-NEXT:    [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]]
 ; CHECK-NEXT:    [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
-; CHECK-NEXT:    [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
+; CHECK-NEXT:    [[_34_I_I:%.*]] = getelementptr inbounds nuw [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
 ; CHECK-NEXT:    [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]]
 ; CHECK-NEXT:    [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1
 ; CHECK-NEXT:    [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1

diff  --git a/llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll b/llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll
index aba003b13b4593..d2f33f9c3b7547 100644
--- a/llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll
+++ b/llvm/test/Transforms/PhaseOrdering/switch_with_geps.ll
@@ -14,7 +14,7 @@ define i32 @test(ptr %ptr) {
 ; CHECK-LABEL: define i32 @test(
 ; CHECK-SAME: ptr nocapture readonly [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  start:
-; CHECK-NEXT:    [[PHI:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
+; CHECK-NEXT:    [[PHI:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 4
 ; CHECK-NEXT:    [[RET:%.*]] = load i32, ptr [[PHI]], align 4
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
@@ -59,7 +59,7 @@ define void @test2(ptr %self, i64 %v, i64 %ix) {
 ; CHECK-SAME: ptr nocapture writeonly [[SELF:%.*]], i64 [[V:%.*]], i64 [[IX:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  start:
 ; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = shl i64 [[IX]], 3
-; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i8, ptr [[SELF]], i64 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr inbounds nuw i8, ptr [[SELF]], i64 [[SWITCH_TABLEIDX]]
 ; CHECK-NEXT:    store i64 [[V]], ptr [[GEP5]], align 8
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/RewriteStatepointsForGC/intrinsics.ll b/llvm/test/Transforms/RewriteStatepointsForGC/intrinsics.ll
index 43632ccc04efbb..1a45feac262c24 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/intrinsics.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/intrinsics.ll
@@ -15,21 +15,21 @@ define ptr addrspace(1) @test_simple(ptr addrspace(1) %obj1, ptr addrspace(1) %o
 ; CHECK-LABEL: define {{[^@]+}}@test_simple
 ; CHECK-SAME: (ptr addrspace(1) [[OBJ1:%.*]], ptr addrspace(1) [[OBJ2:%.*]], i32 [[LEN:%.*]], i1 [[C:%.*]]) gc "statepoint-example" {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[OBJ1_12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OBJ1]], i64 12
-; CHECK-NEXT:    [[OBJ2_16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OBJ2]], i64 16
-; CHECK-NEXT:    [[OBJ_X_BASE1:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ1]], ptr addrspace(1) [[OBJ2]], !is_base_value !0
+; CHECK-NEXT:    [[OBJ1_12:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[OBJ1]], i64 12
+; CHECK-NEXT:    [[OBJ2_16:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[OBJ2]], i64 16
+; CHECK-NEXT:    [[OBJ_X_BASE1:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ1]], ptr addrspace(1) [[OBJ2]], !is_base_value [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[OBJ_X:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ1_12]], ptr addrspace(1) [[OBJ2_16]]
-; CHECK-NEXT:    [[OBJ_Y_BASE:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ2]], ptr addrspace(1) [[OBJ1]], !is_base_value !0
+; CHECK-NEXT:    [[OBJ_Y_BASE:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ2]], ptr addrspace(1) [[OBJ1]], !is_base_value [[META0]]
 ; CHECK-NEXT:    [[OBJ_Y:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ2_16]], ptr addrspace(1) [[OBJ1_12]]
 ; CHECK-NEXT:    [[OBJ_X_BASE1_INT:%.*]] = ptrtoint ptr addrspace(1) [[OBJ_X_BASE1]] to i64
 ; CHECK-NEXT:    [[OBJ_X_INT:%.*]] = ptrtoint ptr addrspace(1) [[OBJ_X]] to i64
 ; CHECK-NEXT:    [[OBJ_X_OFFSET:%.*]] = sub i64 [[OBJ_X_INT]], [[OBJ_X_BASE1_INT]]
 ; CHECK-NEXT:    [[OBJ_Y_BASE_INT:%.*]] = ptrtoint ptr addrspace(1) [[OBJ_Y_BASE]] to i64
-; CHECK-NEXT:    [[OBJ_YA_INT:%.*]] = ptrtoint ptr addrspace(1) [[OBJ_Y]] to i64
-; CHECK-NEXT:    [[OBJ_YA_OFFSET:%.*]] = sub i64 [[OBJ_YA_INT]], [[OBJ_Y_BASE_INT]]
+; CHECK-NEXT:    [[OBJ_Y_INT:%.*]] = ptrtoint ptr addrspace(1) [[OBJ_Y]] to i64
+; CHECK-NEXT:    [[OBJ_YA_OFFSET:%.*]] = sub i64 [[OBJ_Y_INT]], [[OBJ_Y_BASE_INT]]
 ; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr nonnull elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "deopt"(ptr addrspace(1) [[OBJ_X_BASE1]], i64 [[OBJ_X_OFFSET]], ptr addrspace(1) [[OBJ_X_BASE1]], i64 [[OBJ_X_OFFSET]], ptr addrspace(1) [[OBJ_Y_BASE]], i64 [[OBJ_YA_OFFSET]]), "gc-live"(ptr addrspace(1) [[OBJ_Y]], ptr addrspace(1) [[OBJ_Y_BASE]]) ]
-; CHECK-NEXT:    [[OBJ_YA_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
-; CHECK-NEXT:    ret ptr addrspace(1) [[OBJ_YA_RELOCATED]]
+; CHECK-NEXT:    [[OBJ_Y_RELOCATED:%.*]] = call coldcc nonnull ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
+; CHECK-NEXT:    ret ptr addrspace(1) [[OBJ_Y_RELOCATED]]
 ;
 entry:
   %obj1.12 = getelementptr inbounds i8, ptr addrspace(1) %obj1, i64 12
@@ -68,10 +68,10 @@ define ptr addrspace(1) @test_chained(ptr addrspace(1) %obj1, ptr addrspace(1) %
 ; CHECK-LABEL: define {{[^@]+}}@test_chained
 ; CHECK-SAME: (ptr addrspace(1) [[OBJ1:%.*]], ptr addrspace(1) [[OBJ2:%.*]], i32 [[LEN:%.*]], i1 [[C:%.*]]) gc "statepoint-example" {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[OBJ_X_BASE1:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ1]], ptr addrspace(1) [[OBJ2]], !is_base_value !0
-; CHECK-NEXT:    [[OBJ_X_BASE_GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OBJ_X_BASE1]], i64 8
-; CHECK-NEXT:    [[OBJ_X_BASE_OF_BASE_GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OBJ_X_BASE1]], i64 20
-; CHECK-NEXT:    [[OBJ_X_BASE_OF_BASE_OF_BASE_GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OBJ_X_BASE1]], i64 24
+; CHECK-NEXT:    [[OBJ_X_BASE1:%.*]] = select i1 [[C]], ptr addrspace(1) [[OBJ1]], ptr addrspace(1) [[OBJ2]], !is_base_value [[META0]]
+; CHECK-NEXT:    [[OBJ_X_BASE_GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[OBJ_X_BASE1]], i64 8
+; CHECK-NEXT:    [[OBJ_X_BASE_OF_BASE_GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[OBJ_X_BASE1]], i64 20
+; CHECK-NEXT:    [[OBJ_X_BASE_OF_BASE_OF_BASE_GEP:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[OBJ_X_BASE1]], i64 24
 ; CHECK-NEXT:    [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr nonnull elementtype(void ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "deopt"(ptr addrspace(1) [[OBJ_X_BASE1]], ptr addrspace(1) [[OBJ_X_BASE1]], ptr addrspace(1) [[OBJ_X_BASE1]], ptr addrspace(1) [[OBJ_X_BASE_GEP]], ptr addrspace(1) [[OBJ_X_BASE_OF_BASE_GEP]], ptr addrspace(1) [[OBJ_X_BASE_OF_BASE_OF_BASE_GEP]], ptr addrspace(1) [[OBJ_X_BASE1]], ptr addrspace(1) [[OBJ_X_BASE1]], ptr addrspace(1) [[OBJ_X_BASE1]], i64 0, i64 0, i64 0, i64 8, i64 20, i64 24, i64 0, i64 0, i64 0), "gc-live"(ptr addrspace(1) [[OBJ_X_BASE1]]) ]
 ; CHECK-NEXT:    [[OBJ_X_BASE1_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 0, i32 0)
 ; CHECK-NEXT:    ret ptr addrspace(1) [[OBJ_X_BASE1_RELOCATED]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index ec9c01c99256b0..4161078e0a41cb 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -16,8 +16,8 @@ target triple = "aarch64--linux-gnu"
 
 define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-LABEL: @gather_multiple_use(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15)
@@ -57,10 +57,10 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
 @data = global [6 x [258 x i8]] zeroinitializer, align 1
 define void @gather_load(ptr noalias %ptr) {
 ; CHECK-LABEL: @gather_load(
-; CHECK-NEXT:    [[ARRAYIDX182:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 2
-; CHECK-NEXT:    [[ARRAYIDX183:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 6
-; CHECK-NEXT:    [[ARRAYIDX185:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX182:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX183:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX185:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8
 ; CHECK-NEXT:    [[L0:%.*]] = load i8, ptr getelementptr inbounds (i8, ptr @data, i64 258), align 1
 ; CHECK-NEXT:    [[CONV150:%.*]] = zext i8 [[L0]] to i16
 ; CHECK-NEXT:    [[ADD152:%.*]] = add nuw nsw i16 [[CONV150]], 10

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
index 91c8db14a45aa1..c43b79e138a308 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -64,7 +64,7 @@ define i32 @getelementptr_4x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext nneg i32 [[TMP6]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP7]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[G:%.*]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[T6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[T6]], [[SUM_032]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP5]], i64 1
@@ -159,7 +159,7 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i64 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg i32 [[TMP4]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[G:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[G:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP3]], i64 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i64

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll
index 1cce52060c479f..5808d64d925c69 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr2.ll
@@ -25,8 +25,8 @@ define void @test_i16_extend(ptr %p.1, ptr %p.2, i32 %idx.i32) {
 ; CHECK-LABEL: @test_i16_extend(
 ; CHECK-NEXT:    [[P_0:%.*]] = load ptr, ptr @global, align 8
 ; CHECK-NEXT:    [[IDX_0:%.*]] = zext i32 [[IDX_I32:%.*]] to i64
-; CHECK-NEXT:    [[T53:%.*]] = getelementptr inbounds i16, ptr [[P_1:%.*]], i64 [[IDX_0]]
-; CHECK-NEXT:    [[T56:%.*]] = getelementptr inbounds i16, ptr [[P_2:%.*]], i64 [[IDX_0]]
+; CHECK-NEXT:    [[T53:%.*]] = getelementptr inbounds nuw i16, ptr [[P_1:%.*]], i64 [[IDX_0]]
+; CHECK-NEXT:    [[T56:%.*]] = getelementptr inbounds nuw i16, ptr [[P_2:%.*]], i64 [[IDX_0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[T53]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i16> [[TMP1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr [[T56]], align 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 7622f9bc5c41d9..9ce79e5ea356b9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -133,7 +133,7 @@ define i16 @reduce_blockstrided2(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-LABEL: @reduce_blockstrided2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[X:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i64 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM]]
@@ -165,7 +165,7 @@ define i16 @reduce_blockstrided2(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM15]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX32]], align 2
-; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds nuw i8, ptr [[Y]], i64 2
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX33]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM4]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX36]], align 2
@@ -254,9 +254,9 @@ define i16 @reduce_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-LABEL: @reduce_blockstrided3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[L0:%.*]] = load i16, ptr [[X:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i64 2
 ; CHECK-NEXT:    [[L1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[X]], i64 4
 ; CHECK-NEXT:    [[L2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM]]
@@ -270,9 +270,9 @@ define i16 @reduce_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[IDXPROM9]]
 ; CHECK-NEXT:    [[L6:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
 ; CHECK-NEXT:    [[L8:%.*]] = load i16, ptr [[Y:%.*]], align 2
-; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[Y]], i64 2
 ; CHECK-NEXT:    [[L9:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2
-; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, ptr [[Y]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds nuw i8, ptr [[Y]], i64 4
 ; CHECK-NEXT:    [[L10:%.*]] = load i16, ptr [[ARRAYIDX16]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[Y]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[L12:%.*]] = load i16, ptr [[ARRAYIDX20]], align 2
@@ -414,12 +414,12 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P2:%.*]], i64 4
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
@@ -677,7 +677,7 @@ entry:
 define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocapture noundef readonly %y, ptr nocapture noundef writeonly %z, i32 noundef %stride) {
 ; CHECK-LABEL: @store_blockstrided3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[X:%.*]], i64 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[STRIDE:%.*]], 1
 ; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
@@ -696,7 +696,7 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-NEXT:    [[ADD26:%.*]] = add nsw i32 [[MUL21]], 1
 ; CHECK-NEXT:    [[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM27]]
-; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 8
+; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds nuw i8, ptr [[Y:%.*]], i64 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX35]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM5]]
 ; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM11]]
@@ -705,9 +705,9 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-NEXT:    [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM23]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX60]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 [[IDXPROM27]]
-; CHECK-NEXT:    [[ARRAYIDX72:%.*]] = getelementptr inbounds i8, ptr [[Z:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX72:%.*]] = getelementptr inbounds nuw i8, ptr [[Z:%.*]], i64 4
 ; CHECK-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP3]], [[TMP0]]
-; CHECK-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 24
+; CHECK-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 24
 ; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr [[X]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i32>, ptr [[Y]], align 4
@@ -716,14 +716,14 @@ define void @store_blockstrided3(ptr nocapture noundef readonly %x, ptr nocaptur
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul nsw <2 x i32> [[TMP9]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 ; CHECK-NEXT:    [[MUL81:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX82:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 32
+; CHECK-NEXT:    [[ARRAYIDX82:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 32
 ; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i32>, ptr [[ARRAYIDX16]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = load <2 x i32>, ptr [[ARRAYIDX52]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 44
-; CHECK-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, ptr [[Z]], i64 36
+; CHECK-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 44
+; CHECK-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds nuw i8, ptr [[Z]], i64 36
 ; CHECK-NEXT:    [[TMP17:%.*]] = load <2 x i32>, ptr [[ARRAYIDX28]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = load <2 x i32>, ptr [[ARRAYIDX64]], align 4
 ; CHECK-NEXT:    store i32 [[MUL73]], ptr [[Z]], align 4
@@ -915,15 +915,15 @@ define void @store_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P2:%.*]], i64 4
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
-; CHECK-NEXT:    [[DST4:%.*]] = getelementptr inbounds i8, ptr [[DST0:%.*]], i64 16
-; CHECK-NEXT:    [[DST8:%.*]] = getelementptr inbounds i8, ptr [[DST0]], i64 32
-; CHECK-NEXT:    [[DST12:%.*]] = getelementptr inbounds i8, ptr [[DST0]], i64 48
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4
+; CHECK-NEXT:    [[DST4:%.*]] = getelementptr inbounds nuw i8, ptr [[DST0:%.*]], i64 16
+; CHECK-NEXT:    [[DST8:%.*]] = getelementptr inbounds nuw i8, ptr [[DST0]], i64 32
+; CHECK-NEXT:    [[DST12:%.*]] = getelementptr inbounds nuw i8, ptr [[DST0]], i64 48
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
@@ -1201,20 +1201,20 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST1:%.*]] to i64
 ; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[ST2:%.*]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P1:%.*]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P2:%.*]], i64 4
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64]], i64 4
 ; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_1]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_1]], i64 4
 ; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]]
-; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4
-; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR_2]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[ADD_PTR64_2]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll b/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
index 1c408f19a8a324..7faf11c34d76e3 100644
--- a/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/WebAssembly/no-vectorize-rotate.ll
@@ -18,7 +18,7 @@ define void @foo(<2 x i64> %x, <4 x i32> %y, ptr %out) #0 {
 ; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x i32> [[Y]], i64 3
 ; CHECK-NEXT:    [[CONV17:%.*]] = zext i32 [[E]] to i64
 ; CHECK-NEXT:    [[F:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[D]], i64 [[D]], i64 [[CONV17]])
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i32 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i32 8
 ; CHECK-NEXT:    store i64 [[F]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
index b4cbdfa10460f6..9c9a89ac76d8be 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
@@ -22,10 +22,10 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; SSE-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1)
 ; SSE-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; SSE-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
-; SSE-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
+; SSE-NEXT:    [[T4:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 [[TMP4]]
 ; SSE-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
 ; SSE-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 [[TMP6]]
 ; SSE-NEXT:    [[T6:%.*]] = load i8, ptr [[T4]], align 1
 ; SSE-NEXT:    [[T7:%.*]] = load i8, ptr [[T5]], align 1
 ; SSE-NEXT:    [[T8:%.*]] = add i8 [[T6]], [[T7]]
@@ -38,10 +38,10 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; AVX-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1)
 ; AVX-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; AVX-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
-; AVX-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
+; AVX-NEXT:    [[T4:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 [[TMP4]]
 ; AVX-NEXT:    [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i64 1
 ; AVX-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i64
-; AVX-NEXT:    [[T5:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[TMP6]]
+; AVX-NEXT:    [[T5:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 [[TMP6]]
 ; AVX-NEXT:    [[T6:%.*]] = load i8, ptr [[T4]], align 1
 ; AVX-NEXT:    [[T7:%.*]] = load i8, ptr [[T5]], align 1
 ; AVX-NEXT:    [[T8:%.*]] = add i8 [[T6]], [[T7]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
index 95da098b8b1435..bfe93dc7f88b7f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -21,13 +21,13 @@ define void @test1(ptr %a, ptr %b, ptr %c) {
 ; NOSLP-NEXT:    [[I0:%.*]] = load double, ptr [[A:%.*]], align 8
 ; NOSLP-NEXT:    [[I1:%.*]] = load double, ptr [[B:%.*]], align 8
 ; NOSLP-NEXT:    [[MUL:%.*]] = fmul double [[I0]], [[I1]]
-; NOSLP-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
+; NOSLP-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 8
 ; NOSLP-NEXT:    [[I3:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
-; NOSLP-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 8
+; NOSLP-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 8
 ; NOSLP-NEXT:    [[I4:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
 ; NOSLP-NEXT:    [[MUL5:%.*]] = fmul double [[I3]], [[I4]]
 ; NOSLP-NEXT:    store double [[MUL]], ptr [[C:%.*]], align 8
-; NOSLP-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 8
+; NOSLP-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 8
 ; NOSLP-NEXT:    store double [[MUL5]], ptr [[ARRAYIDX5]], align 8
 ; NOSLP-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
index 6001c659a37b75..925c334cb5f20a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll
@@ -7,11 +7,11 @@
 
 define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -23,11 +23,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -39,11 +39,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -55,11 +55,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -71,11 +71,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -107,35 +107,35 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 
 define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_2(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4
 ; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 ; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
 ; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_2(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -146,13 +146,13 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_2(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -163,13 +163,13 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_2(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -180,13 +180,13 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_2(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX512VL-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -223,39 +223,39 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; SSE-LABEL: @gather_load_3(
 ; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4
 ; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
-; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
+; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 ; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
 ; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
-; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
+; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
 ; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
-; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 20
+; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 20
 ; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
+; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
 ; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
-; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
+; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 24
 ; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
+; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
 ; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
-; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 28
+; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 28
 ; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
 ; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
 ; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
@@ -263,19 +263,19 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ;
 ; AVX-LABEL: @gather_load_3(
 ; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
 ; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
+; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
 ; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
+; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
 ; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
+; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
 ; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
+; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
 ; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
 ; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
@@ -291,19 +291,19 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ;
 ; AVX2-LABEL: @gather_load_3(
 ; AVX2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
 ; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
 ; AVX2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
 ; AVX2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
 ; AVX2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
+; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
 ; AVX2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
 ; AVX2-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
@@ -378,20 +378,20 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 
 define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
 ; SSE-LABEL: @gather_load_4(
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i8, ptr [[T0:%.*]], i64 4
-; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
-; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 8
-; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
-; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 12
-; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
-; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 16
-; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
-; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 20
-; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
-; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 24
-; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
-; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 28
-; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds nuw i8, ptr [[T0:%.*]], i64 4
+; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
+; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 8
+; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
+; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 12
+; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
+; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 16
+; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
+; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 20
+; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
+; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 24
+; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
+; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 28
+; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
 ; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
@@ -419,13 +419,13 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_4(
-; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
-; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
-; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
-; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
-; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
-; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
-; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
+; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
+; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
+; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
+; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
+; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
+; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
+; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
 ; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
@@ -447,13 +447,13 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_4(
-; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
-; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
-; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
-; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
-; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
-; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
-; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
+; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
+; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
+; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
+; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
+; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
+; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
+; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
 ; AVX2-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
@@ -541,12 +541,12 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_div(
 ; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
 ; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 16
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 16
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
@@ -562,21 +562,21 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; SSE-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i64 3
 ; SSE-NEXT:    [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]]
 ; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
+; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
 ; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
+; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
 ; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
 ; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
 ; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
+; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
 ; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
+; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
 ; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
+; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
 ; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i64 0
 ; SSE-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i64 1
@@ -592,26 +592,26 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ;
 ; AVX-LABEL: @gather_load_div(
 ; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
 ; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
 ; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
+; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
 ; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
 ; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
+; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
 ; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
+; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
 ; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
+; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
 ; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
+; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
 ; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
@@ -640,26 +640,26 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ;
 ; AVX2-LABEL: @gather_load_div(
 ; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
 ; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
 ; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
+; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
 ; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
 ; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
+; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
 ; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
+; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
 ; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
+; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
 ; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
+; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
 ; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
index b1942bfe073f2c..dc1ba4ec7e7ab3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll
@@ -7,11 +7,11 @@
 
 define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -23,11 +23,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -39,11 +39,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -55,11 +55,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -71,11 +71,11 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
@@ -107,35 +107,35 @@ define void @gather_load(ptr noalias nocapture %0, ptr noalias nocapture readonl
 
 define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_2(
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP5:%.*]] = add nsw i32 [[TMP4]], 1
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4
 ; SSE-NEXT:    store i32 [[TMP5]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; SSE-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP8]], 2
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; SSE-NEXT:    store i32 [[TMP9]], ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; SSE-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP13:%.*]] = add nsw i32 [[TMP12]], 3
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 ; SSE-NEXT:    store i32 [[TMP13]], ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; SSE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; SSE-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 4
 ; SSE-NEXT:    store i32 [[TMP17]], ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_2(
-; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -146,13 +146,13 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_2(
-; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX2-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX2-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX2-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -163,13 +163,13 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX2-NEXT:    ret void
 ;
 ; AVX512F-LABEL: @gather_load_2(
-; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512F-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512F-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX512F-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX512F-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX512F-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX512F-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX512F-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX512F-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX512F-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX512F-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -180,13 +180,13 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; AVX512F-NEXT:    ret void
 ;
 ; AVX512VL-LABEL: @gather_load_2(
-; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4
+; AVX512VL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1:%.*]], i64 4
 ; AVX512VL-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
+; AVX512VL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
 ; AVX512VL-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
+; AVX512VL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
 ; AVX512VL-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX512VL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX512VL-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
 ; AVX512VL-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
 ; AVX512VL-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1
@@ -223,39 +223,39 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ; SSE-LABEL: @gather_load_3(
 ; SSE-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], 1
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 4
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 4
 ; SSE-NEXT:    store i32 [[TMP4]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; SSE-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 2
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 8
 ; SSE-NEXT:    store i32 [[TMP8]], ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; SSE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; SSE-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], 3
-; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
+; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 12
 ; SSE-NEXT:    store i32 [[TMP12]], ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
+; SSE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
 ; SSE-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP16:%.*]] = add i32 [[TMP15]], 4
-; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
+; SSE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    store i32 [[TMP16]], ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
+; SSE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
 ; SSE-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], 1
-; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 20
+; SSE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 20
 ; SSE-NEXT:    store i32 [[TMP20]], ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
+; SSE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
 ; SSE-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP24:%.*]] = add i32 [[TMP23]], 2
-; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
+; SSE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 24
 ; SSE-NEXT:    store i32 [[TMP24]], ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
+; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
 ; SSE-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP28:%.*]] = add i32 [[TMP27]], 3
-; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 28
+; SSE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 28
 ; SSE-NEXT:    store i32 [[TMP28]], ptr [[TMP25]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
 ; SSE-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP32:%.*]] = add i32 [[TMP31]], 4
 ; SSE-NEXT:    store i32 [[TMP32]], ptr [[TMP29]], align 4, !tbaa [[TBAA0]]
@@ -263,19 +263,19 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ;
 ; AVX-LABEL: @gather_load_3(
 ; AVX-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
+; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
 ; AVX-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
+; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
 ; AVX-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
+; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
 ; AVX-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
+; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
 ; AVX-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
+; AVX-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
 ; AVX-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
 ; AVX-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
@@ -291,19 +291,19 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 ;
 ; AVX2-LABEL: @gather_load_3(
 ; AVX2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 44
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 44
 ; AVX2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16
 ; AVX2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 60
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 60
 ; AVX2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 72
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 72
 ; AVX2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 36
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 36
 ; AVX2-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 24
 ; AVX2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 84
+; AVX2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 84
 ; AVX2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = insertelement <8 x i32> poison, i32 [[TMP3]], i64 0
 ; AVX2-NEXT:    [[TMP19:%.*]] = insertelement <8 x i32> [[TMP18]], i32 [[TMP5]], i64 1
@@ -378,20 +378,20 @@ define void @gather_load_3(ptr noalias nocapture %0, ptr noalias nocapture reado
 
 define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
 ; SSE-LABEL: @gather_load_4(
-; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds i8, ptr [[T0:%.*]], i64 4
-; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
-; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 8
-; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
-; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 12
-; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
-; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 16
-; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
-; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 20
-; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
-; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 24
-; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
-; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds i8, ptr [[T0]], i64 28
-; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
+; SSE-NEXT:    [[T5:%.*]] = getelementptr inbounds nuw i8, ptr [[T0:%.*]], i64 4
+; SSE-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
+; SSE-NEXT:    [[T9:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 8
+; SSE-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
+; SSE-NEXT:    [[T13:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 12
+; SSE-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
+; SSE-NEXT:    [[T17:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 16
+; SSE-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
+; SSE-NEXT:    [[T21:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 20
+; SSE-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
+; SSE-NEXT:    [[T25:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 24
+; SSE-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
+; SSE-NEXT:    [[T29:%.*]] = getelementptr inbounds nuw i8, ptr [[T0]], i64 28
+; SSE-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
 ; SSE-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
@@ -419,13 +419,13 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @gather_load_4(
-; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
-; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
-; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
-; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
-; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
-; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
-; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
+; AVX-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
+; AVX-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
+; AVX-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
+; AVX-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
+; AVX-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
+; AVX-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
+; AVX-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
 ; AVX-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
@@ -447,13 +447,13 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 ; AVX-NEXT:    ret void
 ;
 ; AVX2-LABEL: @gather_load_4(
-; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds i8, ptr [[T1:%.*]], i64 44
-; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 16
-; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 60
-; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 72
-; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 36
-; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 24
-; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds i8, ptr [[T1]], i64 84
+; AVX2-NEXT:    [[T6:%.*]] = getelementptr inbounds nuw i8, ptr [[T1:%.*]], i64 44
+; AVX2-NEXT:    [[T10:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 16
+; AVX2-NEXT:    [[T14:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 60
+; AVX2-NEXT:    [[T18:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 72
+; AVX2-NEXT:    [[T22:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 36
+; AVX2-NEXT:    [[T26:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 24
+; AVX2-NEXT:    [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[T1]], i64 84
 ; AVX2-NEXT:    [[T3:%.*]] = load i32, ptr [[T1]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[T7:%.*]] = load i32, ptr [[T6]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[T11:%.*]] = load i32, ptr [[T10]], align 4, !tbaa [[TBAA0]]
@@ -541,12 +541,12 @@ define void @gather_load_4(ptr noalias nocapture %t0, ptr noalias nocapture read
 define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture readonly %1) {
 ; SSE-LABEL: @gather_load_div(
 ; SSE-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
-; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
-; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
-; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
+; SSE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; SSE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
 ; SSE-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0:%.*]], i64 16
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0:%.*]], i64 16
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP11:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP12:%.*]] = load <2 x float>, ptr [[TMP5]], align 4, !tbaa [[TBAA0]]
@@ -562,21 +562,21 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ; SSE-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP8]], i64 3
 ; SSE-NEXT:    [[TMP23:%.*]] = fdiv <4 x float> [[TMP19]], [[TMP22]]
 ; SSE-NEXT:    store <4 x float> [[TMP23]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
+; SSE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
 ; SSE-NEXT:    [[TMP25:%.*]] = load float, ptr [[TMP24]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
+; SSE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
 ; SSE-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; SSE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
 ; SSE-NEXT:    [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
+; SSE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
 ; SSE-NEXT:    [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; SSE-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; SSE-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
+; SSE-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
 ; SSE-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
+; SSE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
 ; SSE-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4, !tbaa [[TBAA0]]
-; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
+; SSE-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
 ; SSE-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4, !tbaa [[TBAA0]]
 ; SSE-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> poison, float [[TMP25]], i64 0
 ; SSE-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP29]], i64 1
@@ -592,26 +592,26 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ;
 ; AVX-LABEL: @gather_load_div(
 ; AVX-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
+; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
+; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
 ; AVX-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
+; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
 ; AVX-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
+; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
 ; AVX-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; AVX-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
 ; AVX-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
+; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
 ; AVX-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
+; AVX-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
 ; AVX-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
+; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
 ; AVX-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
-; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
+; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
 ; AVX-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]
@@ -640,26 +640,26 @@ define void @gather_load_div(ptr noalias nocapture %0, ptr noalias nocapture rea
 ;
 ; AVX2-LABEL: @gather_load_div(
 ; AVX2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1:%.*]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40
-; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 52
-; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 176
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 52
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 12
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 176
 ; AVX2-NEXT:    [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 68
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 68
 ; AVX2-NEXT:    [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 132
+; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 132
 ; AVX2-NEXT:    [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
+; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 32
 ; AVX2-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP13]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 120
+; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 120
 ; AVX2-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20
+; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 20
 ; AVX2-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 108
+; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 108
 ; AVX2-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP19]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 80
+; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 80
 ; AVX2-NEXT:    [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4, !tbaa [[TBAA0]]
-; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 92
+; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 92
 ; AVX2-NEXT:    [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP25:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA0]]
 ; AVX2-NEXT:    [[TMP26:%.*]] = load <2 x float>, ptr [[TMP4]], align 4, !tbaa [[TBAA0]]

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
index d1c15306a72d08..ff1e165c8c54a2 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll
@@ -18,9 +18,9 @@
 define dso_local void @merge(ptr nocapture readonly %params) local_unnamed_addr align 2 {
 ; CHECK-LABEL: @merge(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SAVEPAIRLISTS3:%.*]] = getelementptr inbounds i8, ptr [[PARAMS:%.*]], i64 144
+; CHECK-NEXT:    [[SAVEPAIRLISTS3:%.*]] = getelementptr inbounds nuw i8, ptr [[PARAMS:%.*]], i64 144
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SAVEPAIRLISTS3]], align 8
-; CHECK-NEXT:    [[USEPAIRLISTS4:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i64 148
+; CHECK-NEXT:    [[USEPAIRLISTS4:%.*]] = getelementptr inbounds nuw i8, ptr [[PARAMS]], i64 148
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[USEPAIRLISTS4]], align 4
 ; CHECK-NEXT:    [[TOBOOL54_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL54_NOT]], label [[LOR_LHS_FALSE55:%.*]], label [[IF_END109:%.*]]
@@ -37,7 +37,7 @@ define dso_local void @merge(ptr nocapture readonly %params) local_unnamed_addr
 ; CHECK:       if.then117:
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.then138:
-; CHECK-NEXT:    [[DOTIN:%.*]] = getelementptr inbounds i8, ptr [[PARAMS]], i64 172
+; CHECK-NEXT:    [[DOTIN:%.*]] = getelementptr inbounds nuw i8, ptr [[PARAMS]], i64 172
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DOTIN]], align 4
 ; CHECK-NEXT:    [[TOBOOL139_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL139_NOT]], label [[IF_ELSE147:%.*]], label [[IF_THEN140:%.*]]
@@ -89,9 +89,9 @@ if.else147:                                       ; preds = %if.then138
 ;; Check the last store is deleted.
 define i32 @load(ptr nocapture %a, ptr nocapture %b) {
 ; CHECK-LABEL: @load(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 4
 ; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
 ; CHECK-NEXT:    ret i32 [[TMP2]]

diff  --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
index 2069efd12d27af..2199d6a07981a1 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
@@ -35,7 +35,7 @@ define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x)
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[I_07]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]]
 ; CHECK-NEXT:    store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:

diff  --git a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
index b3b6abe314e6fc..349a18148460f4 100644
--- a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
@@ -13,7 +13,7 @@ define i32 @foo(i32 %x) #0 section ".tcm_text" {
 ; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 6
 ; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; ENABLE:       switch.lookup:
-; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.foo, i32 0, i32 [[X]]
+; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds nuw [6 x i32], ptr @switch.table.foo, i32 0, i32 [[X]]
 ; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
 ; ENABLE-NEXT:    br label [[RETURN]]
 ; ENABLE:       return:


        


More information about the cfe-commits mailing list