[llvm] [InstCombine] Retain inbounds when canonicalising add+gep (PR #72244)

Tue Nov 14 03:44:35 PST 2023

https://github.com/john-brawn-arm created https://github.com/llvm/llvm-project/pull/72244

When canonicalising add+gep to gep+gep retain the inbounds qualifier when we can prove both of the geps will be inbounds. This is done by checking that one index is a loop induction variable starting at 0 and the other index is the same in all loop iterations.

>From 1118b6c46fc5541d31133b8ae25b6e34adaab6b8 Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Mon, 6 Nov 2023 17:13:28 +0000
Subject: [PATCH] [InstCombine] Retain inbounds when canonicalising add+gep

When canonicalising add+gep to gep+gep retain the inbounds qualifier
when we can prove both of the geps will be inbounds. This is done by
checking that one index is a loop induction variable starting at 0
and the other index is the same in all loop iterations.
---
 .../InstCombine/InstructionCombining.cpp      |  49 +++-
 llvm/test/Transforms/InstCombine/add-gep.ll   | 228 ++++++++++++++++++
 .../InstCombine/mem-par-metadata-memcpy.ll    |   4 +-
 .../Transforms/LoopVectorize/induction.ll     |  24 +-
 .../invariant-store-vectorization.ll          |  40 +--
 .../Transforms/LoopVectorize/runtime-check.ll |  20 +-
 6 files changed, 317 insertions(+), 48 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/add-gep.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 16de4e3ffe1f51a..b921f49e44c1c49 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2334,10 +2334,51 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // as:
       //   %newptr = getelementptr i32, i32* %ptr, i64 %idx1
       //   %newgep = getelementptr i32, i32* %newptr, i64 %idx2
-      auto *NewPtr = Builder.CreateGEP(GEP.getResultElementType(),
-                                       GEP.getPointerOperand(), Idx1);
-      return GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr,
-                                       Idx2);
+      // If %gep is inbounds then %newgep can be inbounds only if %newptr is as
+      // well, as an inbounds gep requires the base pointer to be inbounds. We
+      // can mark %newptr as inbounds if we have a loop like
+      //   for (i = 0; ...)
+      //     ptr[i+x]
+      // If x is the same in each loop iteration then we know that we have a
+      // series of geps starting with ptr[x], which means that ptr[x] must be
+      // inbounds.
+      auto CheckIdx = [&](Value *LoopIdx, Value *FixedIdx) {
+        // Check that LoopIdx is a loop induction variable that starts at 0.
+        auto *PHI = dyn_cast<PHINode>(LoopIdx);
+        BinaryOperator *BO;
+        Value *Start, *End;
+        if (!PHI || !matchSimpleRecurrence(PHI, BO, Start, End) ||
+            !match(Start, m_Zero()))
+          return false;
+        // If FixedIdx dominates the phi then it's the same in each loop
+        // iteration.
+        if (DT.dominates(FixedIdx, PHI))
+          return true;
+        // If FixedIdx is a binary expression of values that dominate the phi
+        // then it's the same in each loop iteration.
+        Value *Left, *Right;
+        if (match(FixedIdx, m_BinOp(m_Value(Left), m_Value(Right))) &&
+            DT.dominates(Left, PHI) && DT.dominates(Right, PHI))
+          return true;
+        // We can't handle anything else.
+        return false;
+      };
+      bool InBounds = false;
+      if (GEP.isInBounds()) {
+        if (CheckIdx(Idx2, Idx1)) {
+          InBounds = true;
+        } else if (CheckIdx(Idx1, Idx2)) {
+          std::swap(Idx1, Idx2);
+          InBounds = true;
+        }
+      }
+      auto *NewPtr =
+          Builder.CreateGEP(GEP.getResultElementType(), GEP.getPointerOperand(),
+                            Idx1, "", InBounds);
+      auto *NewGEP =
+          GetElementPtrInst::Create(GEP.getResultElementType(), NewPtr, Idx2);
+      NewGEP->setIsInBounds(InBounds);
+      return NewGEP;
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/add-gep.ll b/llvm/test/Transforms/InstCombine/add-gep.ll
new file mode 100644
index 000000000000000..c33782aadf8e296
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/add-gep.ll
@@ -0,0 +1,228 @@
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %N, i32 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %k
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %i
+for.body:
+  %add = add i32 %i, %k
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test2
+define void @test2(i32 %N, i32 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %mul
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %i
+for.body:
+  %mul = mul i32 %k, 42
+  %add = add i32 %i, %mul
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test3
+define void @test3(i32 %N, ptr %A, i32 %val) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end7
+
+for.body:
+  br label %for.cond1
+
+for.cond1:
+  %j = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+  %cmp2 = icmp ult i32 %j, %N
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+; CHECK-LABEL: for.body3:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %i
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %j
+for.body3:
+  %add = add i32 %i, %j
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %j, 1
+  br label %for.cond1
+
+for.inc5:
+  %inc6 = add i32 %i, 1
+  br label %for.cond
+
+for.end7:
+  ret void
+}
+
+; CHECK-LABEL: @test4
+define void @test4(i32 %N, ptr %A, i32 %val) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end7
+
+for.body:
+  br label %for.cond1
+
+for.cond1:
+  %j = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
+  %cmp2 = icmp ult i32 %j, %N
+  br i1 %cmp2, label %for.body3, label %for.inc5
+
+; CHECK-LABEL: for.body3:
+; CHECK:      [[GEP:%.*]] = getelementptr inbounds i8, ptr %A, i32 %mul
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr [[GEP]], i32 %j
+for.body3:
+  %mul = mul i32 %i, %N
+  %add = add i32 %mul, %j
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %j, 1
+  br label %for.cond1
+
+for.inc5:
+  %inc6 = add i32 %i, 1
+  br label %for.cond
+
+for.end7:
+  ret void
+}
+
+; We can't use inbounds here because the add operand doesn't dominate the loop
+; CHECK-LABEL: @test5
+define void @test5(i32 %N, ptr %A, ptr %B) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %i
+; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %0
+for.body:
+  %0 = load i32, ptr %B, align 4
+  %add = add i32 %i, %0
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; We can't use inbounds here because we don't have a loop
+; CHECK-LABEL: @test6
+define void @test6(i32 %k, i32 %j, ptr %A) {
+entry:
+  %cmp = icmp ugt i32 %k, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %if.end
+
+if.else:
+  br label %if.end
+
+; CHECK-LABEL: if.end:
+; CHECK:      [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %val
+; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %j
+if.end:
+  %val = phi i32 [ 0, %if.then ], [ 1, %if.else ]
+  %add = add i32 %val, %j
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  ret void
+}
+
+; Inbounds gep would be invalid because of potential overflow in the add, though
+; we don't convert to gep+gep as we insert an explicit sext instead of using i16
+; gep offset.
+; CHECK-LABEL: @test7
+define void @test7(i16 %N, i16 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i16 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i16 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      %add = add i16 %i, %k
+; CHECK-NEXT: [[SEXT:%.*]] = sext i16 %add to i32
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i8, ptr %A, i32 [[SEXT]]
+for.body:
+  %add = add i16 %i, %k
+  %arrayidx = getelementptr inbounds i8, ptr %A, i16 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i16 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+; %i starts at 1 so we can't use inbounds
+; CHECK-LABEL: @test8
+define void @test8(i32 %N, i32 %k, ptr %A) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i = phi i32 [ 1, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ult i32 %i, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+; CHECK:      [[GEP:%.*]] = getelementptr i8, ptr %A, i32 %i
+; CHECK-NEXT: %arrayidx = getelementptr i8, ptr [[GEP]], i32 %k
+for.body:
+  %add = add i32 %i, %k
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %add
+  store i8 1, ptr %arrayidx, align 4
+  %inc = add i32 %i, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
index 7826611ecc16574..af629882965c29d 100644
--- a/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
+++ b/llvm/test/Transforms/InstCombine/mem-par-metadata-memcpy.ll
@@ -23,8 +23,8 @@ define void @_Z4testPcl(ptr %out, i64 %size) {
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[OUT:%.*]], i64 [[I_0]]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[I_0]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[SIZE]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 [[SIZE]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 [[I_0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX1]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
 ; CHECK-NEXT:    store i16 [[TMP1]], ptr [[ARRAYIDX]], align 1, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 2df55bdf89a00fe..ec89682305dce9b 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -348,11 +348,11 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; IND:       vector.body:
 ; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IND-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; IND-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; IND-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; IND-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; IND-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope !4, !noalias !7
-; IND-NEXT:    [[TMP7:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; IND-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP7]], i64 [[OFFSET2]]
+; IND-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; IND-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[INDEX]]
 ; IND-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP8]], align 4, !alias.scope !7
 ; IND-NEXT:    [[TMP9:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
 ; IND-NEXT:    [[TMP10:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP9]]
@@ -408,13 +408,13 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP6]], align 4, !alias.scope !4, !noalias !7
 ; UNROLL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 2
 ; UNROLL-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope !4, !noalias !7
-; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
+; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope !7
 ; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 2
 ; UNROLL-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP10]], align 4, !alias.scope !7
@@ -551,13 +551,13 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope !4, !noalias !7
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !alias.scope !4, !noalias !7
-; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[OFFSET2]]
+; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope !7
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 4
 ; INTERLEAVE-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !alias.scope !7
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 9e36649bcf73d6a..2b2044197df2fce 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -347,14 +347,14 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    br i1 [[CMP20]], label [[FOR_END10:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader.preheader:
 ; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[VAR2:%.*]], i64 4
-; CHECK-NEXT:    [[INVARIANT_GEP5:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 4
+; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 4
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
 ; CHECK-NEXT:    [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[J_022:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC8]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV23]], 2
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[VAR1]], i64 [[TMP0]]
-; CHECK-NEXT:    [[GEP6:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP5]], i64 [[TMP0]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]]
 ; CHECK-NEXT:    br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]]
 ; CHECK:       for.body3.lr.ph:
@@ -377,43 +377,43 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[SCEVGEP3]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP4]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[GEP6]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP2]], [[GEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP5]], 8589934588
 ; CHECK-NEXT:    [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0
-; CHECK-NEXT:    [[INVARIANT_GEP:%.*]] = getelementptr i32, ptr [[VAR2]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[TMP1]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP12]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[GEP]], align 4, !alias.scope !23
-; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP14]] = add <4 x i32> [[TMP13]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP12]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4, !alias.scope !23
+; CHECK-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP16]] = add <4 x i32> [[TMP15]], <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP14]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
-; CHECK-NEXT:    store i32 [[TMP16]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP16]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
+; CHECK-NEXT:    store i32 [[TMP18]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP1]], [[FOR_BODY3_LR_PH]] ], [ [[TMP1]], [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[TMP19:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT:    [[TMP19]] = add nsw i32 [[ADD]], 1
-; CHECK-NEXT:    store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    [[TMP21]] = add nsw i32 [[ADD]], 1
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index 920ce9cef507751..cbb5227ca3b10c6 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -131,11 +131,11 @@ define void @test_runtime_check(ptr %a, float %b, i64 %offset, i64 %offset2, i64
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[TMP5]], i64 [[OFFSET]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope !15, !noalias !18
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, ptr [[TMP7]], i64 [[OFFSET2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope !18
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[TMP9]]
@@ -225,17 +225,17 @@ define void @test_runtime_check2(ptr %a, float %b, i64 %offset, i64 %offset2, i6
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr [[A:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[ARR_IDX:%.*]] = getelementptr float, ptr [[TMP0]], i64 [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET:%.*]]
+; CHECK-NEXT:    [[ARR_IDX:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[IV]]
 ; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[ARR_IDX]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[ARR_IDX2:%.*]] = getelementptr float, ptr [[TMP1]], i64 [[OFFSET2:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET2:%.*]]
+; CHECK-NEXT:    [[ARR_IDX2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[IV]]
 ; CHECK-NEXT:    [[L2:%.*]] = load float, ptr [[ARR_IDX2]], align 4
 ; CHECK-NEXT:    [[M:%.*]] = fmul fast float [[L2]], [[B:%.*]]
 ; CHECK-NEXT:    [[AD:%.*]] = fadd fast float [[L1]], [[M]]
 ; CHECK-NEXT:    store float [[AD]], ptr [[ARR_IDX]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[C:%.*]], i64 [[IV]]
-; CHECK-NEXT:    [[C_IDX:%.*]] = getelementptr float, ptr [[TMP2]], i64 -1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 -1
+; CHECK-NEXT:    [[C_IDX:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 [[IV]]
 ; CHECK-NEXT:    [[LC:%.*]] = load float, ptr [[C_IDX]], align 4
 ; CHECK-NEXT:    [[VC:%.*]] = fadd float [[LC]], 1.000000e+00
 ; CHECK-NEXT:    [[C_IDX2:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[IV]]