[llvm] 6c767ce - [InstCombine] Canonicalize GEP of GEP by swapping constant-indexed GEP to the back

Thu Oct 20 10:41:37 PDT 2022

Author: William Huang
Date: 2022-10-20T17:41:26Z
New Revision: 6c767cef5a1d21154730f18ec01d76b7f36a3610

URL: https://github.com/llvm/llvm-project/commit/6c767cef5a1d21154730f18ec01d76b7f36a3610
DIFF: https://github.com/llvm/llvm-project/commit/6c767cef5a1d21154730f18ec01d76b7f36a3610.diff

LOG: [InstCombine] Canonicalize GEP of GEP by swapping constant-indexed GEP to the back

Canonicalize GEP of GEP by swapping GEP with some suffix constant indices to the back (and GEP with all constant indices to the back of that), this allows more constant index GEP merging to happen. Exceptions are: If swapping violates use-def relations, or anti-optimizes LICM

For constant indexed GEP of GEP, if they cannot be merged directly, they will be casted to i8* and merged.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D125845

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
    llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
    llvm/test/Transforms/InstCombine/shift.ll
    llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
    llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
    llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c75f22f78267e..4093a07096bbc 100644

--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1943,6 +1943,14 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
   if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
     return nullptr;
 
+  // LICM moves a GEP with constant indices to the front, while canonicalization
+  // swaps it to the back of a non-constant GEP. If both transformations can be
+  // applied, LICM takes priority because it generally provides greater
+  // optimization by reducing instruction count in the loop body, but performing
+  // canonicalization swapping first negates the LICM opportunity while it does
+  // not necessarily reduce instruction count.
+  bool ShouldCanonicalizeSwap = true;
+
   if (Src->getResultElementType() == GEP.getSourceElementType() &&
       Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
       Src->hasOneUse()) {
@@ -1952,6 +1960,12 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
     if (LI) {
       // Try to reassociate loop invariant GEP chains to enable LICM.
       if (Loop *L = LI->getLoopFor(GEP.getParent())) {
+        // If SO1 is invariant and GO1 is variant, they should not be swapped by
+        // canonicalization even if it can be applied, otherwise it triggers
+        // LICM swapping in the next iteration, causing an infinite loop.
+        if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1))
+          ShouldCanonicalizeSwap = false;
+
         // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
         // invariant: this breaks the dependence between GEPs and allows LICM
         // to hoist the invariant part out of the loop.
@@ -1976,12 +1990,31 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
     }
   }
 
-  // Note that if our source is a gep chain itself then we wait for that
-  // chain to be resolved before we perform this transformation.  This
-  // avoids us creating a TON of code in some cases.
-  if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
-    if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
-      return nullptr;   // Wait until our source is folded to completion.
+  // Canonicalize swapping. Swap GEP with constant index suffix to the back if
+  // it doesn't violate def-use relations or contradict with loop invariant
+  // swap above. This allows more potential applications of constant-indexed GEP
+  // optimizations below.
+  if (ShouldCanonicalizeSwap && Src->hasOneUse() &&
+      Src->getPointerOperandType() == GEP.getPointerOperandType() &&
+      Src->getType()->isVectorTy() == GEP.getType()->isVectorTy() &&
+      !isa<GlobalValue>(Src->getPointerOperand())) {
+    // When swapping, GEP with all constant indices are more prioritized than
+    // GEP with only the last few indices (but not all) being constant because
+    // it may be merged with GEP with all constant indices.
+    if ((isa<ConstantInt>(*(Src->indices().end() - 1)) &&
+         !isa<ConstantInt>(*(GEP.indices().end() - 1))) ||
+        (Src->hasAllConstantIndices() && !GEP.hasAllConstantIndices())) {
+      // Cannot guarantee inbounds after swapping because the non-const GEP can
+      // have arbitrary sign.
+      Value *NewSrc = Builder.CreateGEP(
+          GEP.getSourceElementType(), Src->getOperand(0),
+          SmallVector<Value *>(GEP.indices()), Src->getName());
+      GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+          Src->getSourceElementType(), NewSrc,
+          SmallVector<Value *>(Src->indices()), GEP.getName());
+      return NewGEP;
+    }
+  }
 
   // For constant GEPs, use a more general offset-based folding approach.
   // Only do this for opaque pointers, as the result element type may change.

diff  --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
index 0e8d12ba12aae..60cb7ad4bfd95 100644
--- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll
@@ -1,21 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s
+; RUN: opt < %s -passes='require<loops>,instcombine' -opaque-pointers -S | FileCheck %s
 
 ; Constant-indexed GEP instructions in a chain of GEP instructions should be
 ; swapped to the end whenever such transformation is valid. This allows them to
 ; be merged.
 
-declare void @use(i1)
-
 
 ; The constant-indexed GEP instruction should be swapped to the end, even
 ; without merging.
-; result = (((ptr) p + a) + b) + 1
+; result = (((i32*) p + a) + b) + 1
 define ptr @basic(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @basic(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
 ; CHECK-NEXT:    ret ptr [[TMP3]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
@@ -27,33 +25,34 @@ define ptr @basic(ptr %p, i64 %a, i64 %b) {
 ; GEP with the last index being a constant should also be swapped.
 define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @partialConstant1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
-; CHECK-NEXT:    ret ptr [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1
-  %2 = getelementptr inbounds i32, ptr %p, i64 %b
+  %2 = getelementptr inbounds i32, ptr %1, i64 %b
   ret ptr %2
 }
 
 ; Negative test. GEP should not be swapped if the last index is not a constant.
 define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @partialConstant2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]]
-; CHECK-NEXT:    ret ptr [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]]
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a
-  %2 = getelementptr inbounds i32, ptr %p, i64 %b
+  %2 = getelementptr inbounds i32, ptr %1, i64 %b
   ret ptr %2
 }
 
-; Constant-indexed GEP are merged after swawpping.
-; result = ((ptr) p + a) + 3
+; Constant-indexed GEP are merged after swapping.
+; result = ((i32*) p + a) + 3
 define ptr @merge(ptr %p, i64 %a) {
 ; CHECK-LABEL: @merge(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2
-; CHECK-NEXT:    ret ptr [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 3
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
   %2 = getelementptr inbounds i32, ptr %1, i64 %a
@@ -64,16 +63,14 @@ define ptr @merge(ptr %p, i64 %a) {
 ; Multiple constant-indexed GEP. Note that the first two cannot be merged at
 ; first, but after the second and third are merged, the result can be merged
 ; with the first one on the next pass.
-; result = (ptr) ((ptr) ((ptr) ptr + a) + (a * b)) + 9
+; result = (<3 x i32>*) ((i16*) ((i8*) ptr + a) + (a * b)) + 9
 define ptr @nested(ptr %p, i64 %a, i64 %b) {
 ; CHECK-LABEL: @nested(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <3 x i32>, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, ptr [[TMP5]], i64 1
-; CHECK-NEXT:    ret ptr [[TMP6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <3 x i32>, ptr [[TMP3]], i64 10
+; CHECK-NEXT:    ret ptr [[TMP4]]
 ;
   %1 = getelementptr inbounds <3 x i32>, ptr %p, i64 1
   %2 = getelementptr inbounds i8, ptr %1, i64 %a
@@ -87,9 +84,9 @@ define ptr @nested(ptr %p, i64 %a, i64 %b) {
 ; It is valid to swap if the source operand of the first GEP has multiple uses.
 define ptr @multipleUses1(ptr %p) {
 ; CHECK-LABEL: @multipleUses1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 1
 ; CHECK-NEXT:    ret ptr [[TMP3]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1
@@ -98,23 +95,9 @@ define ptr @multipleUses1(ptr %p) {
   ret ptr %3
 }
 
-; It is valid to swap if the second GEP has multiple uses.
-define ptr @multipleUses2(ptr %p, i64 %a) {
-; CHECK-LABEL: @multipleUses2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]]
-; CHECK-NEXT:    call void @use(ptr nonnull [[TMP2]])
-; CHECK-NEXT:    ret ptr [[TMP2]]
-;
-  %1 = getelementptr inbounds i32, ptr %p, i64 1
-  %2 = getelementptr inbounds i32, ptr %1, i64 %a
-  call void @use(ptr %2)
-  ret ptr %2
-}
-
 ; Negative test. It is not valid to swap if the first GEP has multiple uses.
-define ptr @multipleUses3(ptr %p) {
-; CHECK-LABEL: @multipleUses3(
+define ptr @multipleUses2(ptr %p) {
+; CHECK-LABEL: @multipleUses2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]]
@@ -125,3 +108,40 @@ define ptr @multipleUses3(ptr %p) {
   %3 = getelementptr inbounds i32, ptr %1, i64 %2
   ret ptr %3
 }
+
+; Negative test. LICM should take priority over canonicalization, so the first
+; GEP should not be swapped, even if it contains a constant index.
+define i64 @licm(ptr %p) {
+; CHECK-LABEL: @licm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 4
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr i64, ptr [[P1]], i64 [[I]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[P2]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[SUM]], [[LOAD]]
+; CHECK-NEXT:    [[INEXT]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[I]], 1000000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %inext, %for.body ]
+  %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %p1 = getelementptr i64, ptr %p, i64 4
+  %p2 = getelementptr i64, ptr %p1, i64 %i
+  %load = load i64, ptr %p2
+  %add = add nsw i64 %sum, %load
+  %inext = add nuw nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i, 1000000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i64 %add
+}

diff  --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
index a7c5ff9b6ce8c..4a0aade98b2d2 100644
--- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
+++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll
@@ -9,7 +9,7 @@ target datalayout = "i24:8:8"
 %struct.B = type { i8, [3 x i16], %struct.A, float }
 %struct.C = type { i8, i32, i32 }
 
-; result = (ptr) p + 3
+; result = (i32*) p + 3
 define ptr @mergeBasic(ptr %p) {
 ; CHECK-LABEL: @mergeBasic(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3
@@ -20,8 +20,8 @@ define ptr @mergeBasic(ptr %p) {
   ret ptr %2
 }
 
-; Converted to ptr and merged.
-; result = (ptr) p + 10
+; Converted to i8* and merged.
+; result = (i8*) p + 10
 define ptr @mergeDifferentTypes(ptr %p) {
 ; CHECK-LABEL: @mergeDifferentTypes(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
@@ -32,8 +32,8 @@ define ptr @mergeDifferentTypes(ptr %p) {
   ret ptr %2
 }
 
-; Converted to ptr and merged.
-; result = (ptr) p + 10
+; Converted to i8* and merged.
+; result = (i8*) p + 10
 define ptr @mergeReverse(ptr %p) {
 ; CHECK-LABEL: @mergeReverse(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10
@@ -55,7 +55,7 @@ define ptr @zeroSum(ptr %p) {
   ret ptr %2
 }
 
-; result = (ptr) ((ptr) p + 1) + 17
+; result = (i8*) (([20 x i8]*) p + 1) + 17
 define ptr @array1(ptr %p) {
 ; CHECK-LABEL: @array1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17
@@ -66,8 +66,8 @@ define ptr @array1(ptr %p) {
   ret ptr %2
 }
 
-; Converted to ptr and merged.
-; result = (ptr) p + 20
+; Converted to i8* and merged.
+; result = (i8*) p + 20
 define ptr @array2(ptr %p) {
 ; CHECK-LABEL: @array2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 20
@@ -78,8 +78,8 @@ define ptr @array2(ptr %p) {
   ret ptr %2
 }
 
-; Converted to ptr and merged.
-; result = (ptr) p + 36
+; Converted to i8* and merged.
+; result = (i8*) p + 36
 define ptr @struct1(ptr %p) {
 ; CHECK-LABEL: @struct1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 36
@@ -101,7 +101,7 @@ define ptr @struct2(ptr %p) {
   ret ptr %2
 }
 
-; result = (ptr) &((struct.B) p)[0].member2.member0 + 7
+; result = (i8*) &((struct.B) p)[0].member2.member0 + 7
 define ptr @structStruct(ptr %p) {
 ; CHECK-LABEL: @structStruct(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7
@@ -115,7 +115,7 @@ define ptr @structStruct(ptr %p) {
 ; First GEP offset is not divisible by last GEP's source element size, but first
 ; GEP points to an array such that the last GEP offset is divisible by the
 ; array's element size, so the first GEP can be rewritten with an extra index.
-; result = (ptr) &((struct.B*) p)[i].member1 + 2
+; result = (i16*) &((struct.B*) p)[i].member1 + 2
 define ptr @appendIndex(ptr %p, i64 %i) {
 ; CHECK-LABEL: @appendIndex(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 1, i64 2
@@ -126,10 +126,23 @@ define ptr @appendIndex(ptr %p, i64 %i) {
   ret ptr %2
 }
 
-; Offset of either GEP is not divisible by the other's size, converted to ptr
+; After canonicalizing, the second GEP is moved to the front, and then merged
+; with the first one with rewritten indices.
+; result = (i8*) &((struct.A*) &((struct.B*) p)[i].member2).member0 + 2
+define ptr @appendIndexReverse(ptr %p, i64 %i) {
+; CHECK-LABEL: @appendIndexReverse(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 [[I:%.*]], i32 2, i32 0, i64 2
+; CHECK-NEXT:    ret ptr [[TMP1]]
+;
+  %1 = getelementptr inbounds i64, ptr %p, i64 1
+  %2 = getelementptr inbounds %struct.B, ptr %1, i64 %i, i32 1
+  ret ptr %2
+}
+
+; Offset of either GEP is not divisible by the other's size, converted to i8*
 ; and merged.
 ; Here i24 is 8-bit aligned.
-; result = (ptr) p + 7
+; result = (i8*) p + 7
 define ptr @notDivisible(ptr %p) {
 ; CHECK-LABEL: @notDivisible(
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 7
@@ -144,8 +157,8 @@ define ptr @notDivisible(ptr %p) {
 ; or divisible by the other's size.
 define ptr @partialConstant2(ptr %p, i64 %a) {
 ; CHECK-LABEL: @partialConstant2(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [4 x i64], ptr [[P:%.*]], i64 [[A:%.*]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1
 ; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
   %1 = getelementptr inbounds i32, ptr %p, i64 1

diff  --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 737a9c592d1e6..64e98b88f7cc5 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -1723,10 +1723,10 @@ define void @ashr_out_of_range(ptr %A) {
 define void @ashr_out_of_range_1(ptr %A) {
 ; CHECK-LABEL: @ashr_out_of_range_1(
 ; CHECK-NEXT:    [[L:%.*]] = load i177, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[G11:%.*]] = getelementptr i177, ptr [[A]], i64 -1
 ; CHECK-NEXT:    [[B24_LOBIT:%.*]] = ashr i177 [[L]], 175
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i177 [[B24_LOBIT]] to i64
-; CHECK-NEXT:    [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP1]]
+; CHECK-NEXT:    [[G111:%.*]] = getelementptr i177, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[G62:%.*]] = getelementptr i177, ptr [[G111]], i64 -1
 ; CHECK-NEXT:    store i177 0, ptr [[G62]], align 4
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
index 94ccfec39e22e..b02fd162fdeee 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -38,8 +38,7 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8
 ; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -50,8 +49,7 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
 ; CHECK-NEXT:    [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[TMP10]], i64 -7
 ; CHECK-NEXT:    [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)

diff  --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index aa0a253a9a113..d509f01611043 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -762,11 +762,9 @@ define void @mixed_load3_store3(i32* nocapture %A) {
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>*
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>*
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>

diff  --git a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
index 3975a0dd33748..127139d24fac4 100644
--- a/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll
@@ -11,14 +11,14 @@ define i16 @helper(i16 %0, i64 %x) {
 ; CHECK-NEXT:  start:
 ; CHECK-NEXT:    [[DATA:%.*]] = alloca [2 x i8], align 2
 ; CHECK-NEXT:    store i16 [[TMP0:%.*]], ptr [[DATA]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DATA]], i64 1
 ; CHECK-NEXT:    br label [[BB6_I_I:%.*]]
 ; CHECK:       bb6.i.i:
 ; CHECK-NEXT:    [[ITER_SROA_0_07_I_I:%.*]] = phi i64 [ [[TMP2:%.*]], [[BB6_I_I]] ], [ 0, [[START:%.*]] ]
 ; CHECK-NEXT:    [[_40_I_I:%.*]] = sub nsw i64 0, [[ITER_SROA_0_07_I_I]]
 ; CHECK-NEXT:    [[TMP2]] = add nuw nsw i64 [[ITER_SROA_0_07_I_I]], 1
 ; CHECK-NEXT:    [[_34_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[DATA]], i64 0, i64 [[ITER_SROA_0_07_I_I]]
-; CHECK-NEXT:    [[_39_I_I:%.*]] = getelementptr inbounds [0 x i8], ptr [[TMP1]], i64 0, i64 [[_40_I_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [0 x i8], ptr [[DATA]], i64 0, i64 [[_40_I_I]]
+; CHECK-NEXT:    [[_39_I_I:%.*]] = getelementptr i8, ptr [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_34_I_I]], align 1
 ; CHECK-NEXT:    [[TMP2_0_COPYLOAD_I_I_I_I:%.*]] = load i8, ptr [[_39_I_I]], align 1
 ; CHECK-NEXT:    store i8 [[TMP2_0_COPYLOAD_I_I_I_I]], ptr [[_34_I_I]], align 1