[llvm-branch-commits] [llvm] f636baf - Revert "[LSR] Narrow search space by merging users outside and inside loop (#…"

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 4 06:09:43 PDT 2026


Author: John Brawn
Date: 2026-06-04T14:09:38+01:00
New Revision: f636bafef63565a6a3d697a7c4f399250a337994

URL: https://github.com/llvm/llvm-project/commit/f636bafef63565a6a3d697a7c4f399250a337994
DIFF: https://github.com/llvm/llvm-project/commit/f636bafef63565a6a3d697a7c4f399250a337994.diff

LOG: Revert "[LSR] Narrow search space by merging users outside and inside loop (#…"

This reverts commit 37f8a85dbcb70cd87471a4bf388d0d85fa105105.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Removed: 
    llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1b72f2d2e8a25..444372ab2ddfc 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2295,7 +2295,6 @@ class LSRInstance {
   void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
   void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
   void NarrowSearchSpaceByFilterPostInc();
-  void NarrowSearchSpaceByMergingUsesOutsideLoop();
   void NarrowSearchSpaceByDeletingCostlyFormulas();
   void NarrowSearchSpaceByPickingWinnerRegs();
   void NarrowSearchSpaceUsingHeuristics();
@@ -5221,71 +5220,6 @@ void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
 }
 
-void LSRInstance::NarrowSearchSpaceByMergingUsesOutsideLoop() {
-  if (EstimateSearchSpaceComplexity() < ComplexityLimit)
-    return;
-
-  LLVM_DEBUG(
-      dbgs() << "The search space is too complex.\n"
-                "Narrowing the search space by merging uses with fixups "
-                "entirely outside the loop with uses inside the loop.\n");
-
-  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
-    LSRUse &LU = Uses[LUIdx];
-    if (!LU.AllFixupsOutsideLoop || LU.Formulae.empty())
-      continue;
-
-    LLVM_DEBUG(dbgs() << "  Trying to eliminate use "; LU.print(dbgs());
-               dbgs() << '\n');
-
-    // Find a compatible LSRUse inside the loop that we could merge LU with
-    LSRUse *LUToMergeWith = nullptr;
-    for (LSRUse &OtherLU : Uses) {
-      // Only merge with uses inside the loop
-      if (OtherLU.AllFixupsOutsideLoop)
-        continue;
-      // Can't merge with ICmpZero uses as they're handled specially when
-      // expanding
-      if (OtherLU.Kind == LSRUse::ICmpZero)
-        continue;
-      // Can't merge with uses without any formulae
-      if (OtherLU.Formulae.empty())
-        continue;
-      // We can merge with uses that have the same initial formula. We allow
-      // merging of uses with 
diff erent Kind and AccessTy which means that the
-      // cost may end up being inaccurate, but it's also what we would have
-      // gotten if we'd ignored uses outside the loop entirely.
-      const Formula &ThisF = LU.Formulae[0];
-      const Formula &OtherF = OtherLU.Formulae[0];
-      if (ThisF.BaseRegs == OtherF.BaseRegs &&
-          ThisF.ScaledReg == OtherF.ScaledReg &&
-          ThisF.BaseGV == OtherF.BaseGV && ThisF.Scale == OtherF.Scale &&
-          ThisF.UnfoldedOffset == OtherF.UnfoldedOffset &&
-          ThisF.BaseOffset == OtherF.BaseOffset) {
-        LUToMergeWith = &OtherLU;
-        break;
-      }
-    }
-    if (!LUToMergeWith)
-      continue;
-
-    LLVM_DEBUG(dbgs() << "   Merging with "; LUToMergeWith->print(dbgs());
-               dbgs() << '\n');
-
-    // Copy fixups
-    for (LSRFixup &Fixup : LU.Fixups) {
-      LUToMergeWith->pushFixup(Fixup);
-    }
-
-    // Delete the old use.
-    DeleteUse(LU, LUIdx);
-    --LUIdx;
-    --NumUses;
-  }
-
-  LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
-}
-
 /// The function delete formulas with high registers number expectation.
 /// Assuming we don't know the value of each formula (already delete
 /// all inefficient), generate probability of not selecting for each
@@ -5536,7 +5470,6 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
   if (FilterSameScaledReg)
     NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
   NarrowSearchSpaceByFilterPostInc();
-  NarrowSearchSpaceByMergingUsesOutsideLoop();
   if (LSRExpNarrow)
     NarrowSearchSpaceByDeletingCostlyFormulas();
   else

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll
deleted file mode 100644
index 02ba9f23a4222..0000000000000
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll
+++ /dev/null
@@ -1,629 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -S -mtriple=aarch64-none-elf -loop-reduce < %s | FileCheck %s
-
-; These tests check that the presence of uses of a pointer outside a loop don't
-; cause a 
diff erent transformation inside the loop.
-
-define i32 @postinc_inloop_no_outsideloop(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @postinc_inloop_no_outsideloop(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[P0_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P0_LOAD]], i64 32
-; CHECK-NEXT:    [[P1_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P1_LOAD]], i64 32
-; CHECK-NEXT:    [[P2_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P2_LOAD]], i64 32
-; CHECK-NEXT:    [[P3_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P3_LOAD]], i64 32
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P3:%.*]] = phi ptr [ [[P3_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P3_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P2_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P2_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi ptr [ [[P1_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P1_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0:%.*]] = phi ptr [ [[P0_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P0_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0_NEXT]] = getelementptr inbounds nuw i32, ptr [[P0]], i64 4
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[P0]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[P1_NEXT]] = getelementptr inbounds nuw i32, ptr [[P1]], i64 4
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[P1]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[P2_NEXT]] = getelementptr inbounds nuw i32, ptr [[P2]], i64 4
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[P2]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[P3_NEXT]] = getelementptr inbounds nuw i32, ptr [[P3]], i64 4
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[P3]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  %p0.start = getelementptr inbounds nuw i32, ptr %p0.load, i64 32
-  %p1.start = getelementptr inbounds nuw i32, ptr %p1.load, i64 32
-  %p2.start = getelementptr inbounds nuw i32, ptr %p2.load, i64 32
-  %p3.start = getelementptr inbounds nuw i32, ptr %p3.load, i64 32
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %p3 = phi ptr [ %p3.next, %for.body ], [ %p3.start, %entry ]
-  %p2 = phi ptr [ %p2.next, %for.body ], [ %p2.start, %entry ]
-  %p1 = phi ptr [ %p1.next, %for.body ], [ %p1.start, %entry ]
-  %p0 = phi ptr [ %p0.next, %for.body ], [ %p0.start, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0.next = getelementptr inbounds nuw i32, ptr %p0, i64 4
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1.next = getelementptr inbounds nuw i32, ptr %p1, i64 4
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2.next = getelementptr inbounds nuw i32, ptr %p2, i64 4
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3.next = getelementptr inbounds nuw i32, ptr %p3, i64 4
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  ret i32 %ret.4
-}
-
-define i32 @postinc_inloop_postinc_outsideloop(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @postinc_inloop_postinc_outsideloop(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[P0_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P0_LOAD]], i64 32
-; CHECK-NEXT:    [[P1_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P1_LOAD]], i64 32
-; CHECK-NEXT:    [[P2_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P2_LOAD]], i64 32
-; CHECK-NEXT:    [[P3_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P3_LOAD]], i64 32
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P3:%.*]] = phi ptr [ [[P3_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P3_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P2_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P2_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi ptr [ [[P1_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P1_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0:%.*]] = phi ptr [ [[P0_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P0_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0_NEXT]] = getelementptr inbounds nuw i32, ptr [[P0]], i64 4
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[P0]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[P1_NEXT]] = getelementptr inbounds nuw i32, ptr [[P1]], i64 4
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[P1]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[P2_NEXT]] = getelementptr inbounds nuw i32, ptr [[P2]], i64 4
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[P2]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[P3_NEXT]] = getelementptr inbounds nuw i32, ptr [[P3]], i64 4
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[P3]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    store ptr [[P0_NEXT]], ptr [[P]], align 8
-; CHECK-NEXT:    store ptr [[P1_NEXT]], ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    store ptr [[P2_NEXT]], ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    store ptr [[P3_NEXT]], ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  %p0.start = getelementptr inbounds nuw i32, ptr %p0.load, i64 32
-  %p1.start = getelementptr inbounds nuw i32, ptr %p1.load, i64 32
-  %p2.start = getelementptr inbounds nuw i32, ptr %p2.load, i64 32
-  %p3.start = getelementptr inbounds nuw i32, ptr %p3.load, i64 32
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %p3 = phi ptr [ %p3.next, %for.body ], [ %p3.start, %entry ]
-  %p2 = phi ptr [ %p2.next, %for.body ], [ %p2.start, %entry ]
-  %p1 = phi ptr [ %p1.next, %for.body ], [ %p1.start, %entry ]
-  %p0 = phi ptr [ %p0.next, %for.body ], [ %p0.start, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0.next = getelementptr inbounds nuw i32, ptr %p0, i64 4
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1.next = getelementptr inbounds nuw i32, ptr %p1, i64 4
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2.next = getelementptr inbounds nuw i32, ptr %p2, i64 4
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3.next = getelementptr inbounds nuw i32, ptr %p3, i64 4
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  store ptr %p0.next, ptr %p, align 8
-  store ptr %p1.next, ptr %arrayidx1, align 8
-  store ptr %p2.next, ptr %arrayidx2, align 8
-  store ptr %p3.next, ptr %arrayidx3, align 8
-  ret i32 %ret.4
-}
-
-define i32 @offset_inloop_no_outsideloop(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @offset_inloop_no_outsideloop(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[P3_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr nuw i8, ptr [[P2_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr nuw i8, ptr [[P1_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP9:%.*]] = getelementptr nuw i8, ptr [[P0_LOAD]], i64 128
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV10:%.*]] = phi ptr [ [[SCEVGEP11:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP9]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP6]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP3]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[LSR_IV10]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[LSR_IV7]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[LSR_IV4]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4
-; CHECK-NEXT:    [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 4
-; CHECK-NEXT:    [[SCEVGEP11]] = getelementptr i8, ptr [[LSR_IV10]], i64 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0 = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1 = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2 = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3 = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %iv.next = add nuw nsw i64 %iv, 1
-  %off.next = add nuw nsw i64 %off, 1
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  ret i32 %ret.4
-}
-
-define i32 @offset_inloop_offset_outsideloop(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @offset_inloop_offset_outsideloop(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[SCEVGEP16:%.*]] = getelementptr nuw i8, ptr [[P0_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr nuw i8, ptr [[P1_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr nuw i8, ptr [[P2_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP9:%.*]] = getelementptr nuw i8, ptr [[P3_LOAD]], i64 128
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV10:%.*]] = phi ptr [ [[SCEVGEP11:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP9]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP6]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP3]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP16]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[LSR_IV4]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[LSR_IV7]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[LSR_IV10]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-NEXT:    [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4
-; CHECK-NEXT:    [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 4
-; CHECK-NEXT:    [[SCEVGEP11]] = getelementptr i8, ptr [[LSR_IV10]], i64 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    store ptr [[SCEVGEP2]], ptr [[P]], align 8
-; CHECK-NEXT:    store ptr [[SCEVGEP5]], ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    store ptr [[SCEVGEP8]], ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    store ptr [[SCEVGEP11]], ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0 = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1 = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2 = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3 = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %iv.next = add nuw nsw i64 %iv, 1
-  %off.next = add nuw nsw i64 %off, 1
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  %p0.last = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off.next
-  store ptr %p0.last, ptr %p, align 8
-  %p1.last = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off.next
-  store ptr %p1.last, ptr %arrayidx1, align 8
-  %p2.last = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off.next
-  store ptr %p2.last, ptr %arrayidx2, align 8
-  %p3.last = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off.next
-  store ptr %p3.last, ptr %arrayidx3, align 8
-  ret i32 %ret.4
-}
-
-define i32 @postinc_inloop_offset_outsideloop(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @postinc_inloop_offset_outsideloop(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[P0_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P0_LOAD]], i64 32
-; CHECK-NEXT:    [[P1_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P1_LOAD]], i64 32
-; CHECK-NEXT:    [[P2_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P2_LOAD]], i64 32
-; CHECK-NEXT:    [[P3_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P3_LOAD]], i64 32
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P3:%.*]] = phi ptr [ [[P3_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P3_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P2:%.*]] = phi ptr [ [[P2_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P2_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi ptr [ [[P1_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P1_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0:%.*]] = phi ptr [ [[P0_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P0_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0_NEXT]] = getelementptr inbounds nuw i32, ptr [[P0]], i64 4
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[P0]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[P1_NEXT]] = getelementptr inbounds nuw i32, ptr [[P1]], i64 4
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[P1]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[P2_NEXT]] = getelementptr inbounds nuw i32, ptr [[P2]], i64 4
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[P2]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[P3_NEXT]] = getelementptr inbounds nuw i32, ptr [[P3]], i64 4
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[P3]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    store ptr [[P0_NEXT]], ptr [[P]], align 8
-; CHECK-NEXT:    store ptr [[P1_NEXT]], ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    store ptr [[P2_NEXT]], ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    store ptr [[P3_NEXT]], ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  %p0.start = getelementptr inbounds nuw i32, ptr %p0.load, i64 32
-  %p1.start = getelementptr inbounds nuw i32, ptr %p1.load, i64 32
-  %p2.start = getelementptr inbounds nuw i32, ptr %p2.load, i64 32
-  %p3.start = getelementptr inbounds nuw i32, ptr %p3.load, i64 32
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %p3 = phi ptr [ %p3.next, %for.body ], [ %p3.start, %entry ]
-  %p2 = phi ptr [ %p2.next, %for.body ], [ %p2.start, %entry ]
-  %p1 = phi ptr [ %p1.next, %for.body ], [ %p1.start, %entry ]
-  %p0 = phi ptr [ %p0.next, %for.body ], [ %p0.start, %entry ]
-  %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0.next = getelementptr inbounds nuw i32, ptr %p0, i64 4
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1.next = getelementptr inbounds nuw i32, ptr %p1, i64 4
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2.next = getelementptr inbounds nuw i32, ptr %p2, i64 4
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3.next = getelementptr inbounds nuw i32, ptr %p3, i64 4
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %iv.next = add nuw nsw i64 %iv, 1
-  %off.next = add nuw nsw i64 %off, 4
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  %p0.last = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off.next
-  store ptr %p0.last, ptr %p, align 8
-  %p1.last = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off.next
-  store ptr %p1.last, ptr %arrayidx1, align 8
-  %p2.last = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off.next
-  store ptr %p2.last, ptr %arrayidx2, align 8
-  %p3.last = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off.next
-  store ptr %p3.last, ptr %arrayidx3, align 8
-  ret i32 %ret.4
-}
-
-define i32 @offset_inloop_postinc_outsideloop(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @offset_inloop_postinc_outsideloop(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[P0_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P0_LOAD]], i64 32
-; CHECK-NEXT:    [[P1_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P1_LOAD]], i64 32
-; CHECK-NEXT:    [[P2_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P2_LOAD]], i64 32
-; CHECK-NEXT:    [[P3_START:%.*]] = getelementptr inbounds nuw i32, ptr [[P3_LOAD]], i64 32
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P3_LOOP:%.*]] = phi ptr [ [[P3_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P3_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P2_LOOP:%.*]] = phi ptr [ [[P2_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P2_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P1_LOOP:%.*]] = phi ptr [ [[P1_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P1_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[P0_LOOP:%.*]] = phi ptr [ [[P0_NEXT:%.*]], %[[FOR_BODY]] ], [ [[P0_START]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[P0_LOOP]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[P1_LOOP]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[P2_LOOP]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[P3_LOOP]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[P0_NEXT]] = getelementptr inbounds nuw i32, ptr [[P0_LOOP]], i64 4
-; CHECK-NEXT:    [[P1_NEXT]] = getelementptr inbounds nuw i32, ptr [[P1_LOOP]], i64 4
-; CHECK-NEXT:    [[P2_NEXT]] = getelementptr inbounds nuw i32, ptr [[P2_LOOP]], i64 4
-; CHECK-NEXT:    [[P3_NEXT]] = getelementptr inbounds nuw i32, ptr [[P3_LOOP]], i64 4
-; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    store ptr [[P0_NEXT]], ptr [[P]], align 8
-; CHECK-NEXT:    store ptr [[P1_NEXT]], ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    store ptr [[P2_NEXT]], ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    store ptr [[P3_NEXT]], ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  %p0.start = getelementptr inbounds nuw i32, ptr %p0.load, i64 32
-  %p1.start = getelementptr inbounds nuw i32, ptr %p1.load, i64 32
-  %p2.start = getelementptr inbounds nuw i32, ptr %p2.load, i64 32
-  %p3.start = getelementptr inbounds nuw i32, ptr %p3.load, i64 32
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %p3.loop = phi ptr [ %p3.next, %for.body ], [ %p3.start, %entry ]
-  %p2.loop = phi ptr [ %p2.next, %for.body ], [ %p2.start, %entry ]
-  %p1.loop = phi ptr [ %p1.next, %for.body ], [ %p1.start, %entry ]
-  %p0.loop = phi ptr [ %p0.next, %for.body ], [ %p0.start, %entry ]
-  %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0 = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1 = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2 = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3 = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %p0.next = getelementptr inbounds nuw i32, ptr %p0.loop, i64 4
-  %p1.next = getelementptr inbounds nuw i32, ptr %p1.loop, i64 4
-  %p2.next = getelementptr inbounds nuw i32, ptr %p2.loop, i64 4
-  %p3.next = getelementptr inbounds nuw i32, ptr %p3.loop, i64 4
-  %iv.next = add nuw nsw i64 %iv, 1
-  %off.next = add nuw nsw i64 %off, 4
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  store ptr %p0.next, ptr %p, align 8
-  store ptr %p1.next, ptr %arrayidx1, align 8
-  store ptr %p2.next, ptr %arrayidx2, align 8
-  store ptr %p3.next, ptr %arrayidx3, align 8
-  ret i32 %ret.4
-}
-
-; iv_sub.next has the same formula as exitcond, but we shouldn't merge them as
-; it would cause an assertion failure later
-define i32 @icmpzero_merging(ptr %p, i64 %n) {
-; CHECK-LABEL: define i32 @icmpzero_merging(
-; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1
-; CHECK-NEXT:    [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2
-; CHECK-NEXT:    [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3
-; CHECK-NEXT:    [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[SCEVGEP14:%.*]] = getelementptr nuw i8, ptr [[P0_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr nuw i8, ptr [[P1_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr nuw i8, ptr [[P2_LOAD]], i64 128
-; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr nuw i8, ptr [[P3_LOAD]], i64 128
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[LSR_IV9:%.*]] = phi ptr [ [[SCEVGEP10:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP8]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV6:%.*]] = phi ptr [ [[SCEVGEP7:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP5]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV3:%.*]] = phi ptr [ [[SCEVGEP4:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP2]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP1:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP14]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[IV_SUB:%.*]] = phi i64 [ [[IV_SUB_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[RET_4:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[VAL0:%.*]] = load i32, ptr [[LSR_IV]], align 4
-; CHECK-NEXT:    [[RET_1:%.*]] = add nsw i32 [[VAL0]], [[RET_0]]
-; CHECK-NEXT:    [[VAL1:%.*]] = load i32, ptr [[LSR_IV3]], align 4
-; CHECK-NEXT:    [[RET_2:%.*]] = add nsw i32 [[VAL1]], [[RET_1]]
-; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[LSR_IV6]], align 4
-; CHECK-NEXT:    [[RET_3:%.*]] = add nsw i32 [[VAL2]], [[RET_2]]
-; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[LSR_IV9]], align 4
-; CHECK-NEXT:    [[RET_4]] = add nsw i32 [[VAL3]], [[RET_3]]
-; CHECK-NEXT:    [[IV_SUB_NEXT]] = add i64 [[IV_SUB]], -1
-; CHECK-NEXT:    [[SCEVGEP1]] = getelementptr i8, ptr [[LSR_IV]], i64 4
-; CHECK-NEXT:    [[SCEVGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 4
-; CHECK-NEXT:    [[SCEVGEP7]] = getelementptr i8, ptr [[LSR_IV6]], i64 4
-; CHECK-NEXT:    [[SCEVGEP10]] = getelementptr i8, ptr [[LSR_IV9]], i64 4
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_SUB_NEXT]], 0
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
-; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    store ptr [[SCEVGEP1]], ptr [[P]], align 8
-; CHECK-NEXT:    store ptr [[SCEVGEP4]], ptr [[ARRAYIDX1]], align 8
-; CHECK-NEXT:    store ptr [[SCEVGEP7]], ptr [[ARRAYIDX2]], align 8
-; CHECK-NEXT:    store ptr [[SCEVGEP10]], ptr [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i64, ptr [[P]], i64 4
-; CHECK-NEXT:    store i64 [[IV_SUB_NEXT]], ptr [[ARRAYIDX4]], align 8
-; CHECK-NEXT:    ret i32 [[RET_4]]
-;
-entry:
-  %p0.load = load ptr, ptr %p, align 8
-  %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1
-  %p1.load = load ptr, ptr %arrayidx1, align 8
-  %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2
-  %p2.load = load ptr, ptr %arrayidx2, align 8
-  %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3
-  %p3.load = load ptr, ptr %arrayidx3, align 8
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %iv_sub = phi i64 [ %iv_sub.next, %for.body], [ %n, %entry ]
-  %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ]
-  %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ]
-  %p0 = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off
-  %val0 = load i32, ptr %p0, align 4
-  %ret.1 = add nsw i32 %val0, %ret.0
-  %p1 = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off
-  %val1 = load i32, ptr %p1, align 4
-  %ret.2 = add nsw i32 %val1, %ret.1
-  %p2 = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off
-  %val2 = load i32, ptr %p2, align 4
-  %ret.3 = add nsw i32 %val2, %ret.2
-  %p3 = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off
-  %val3 = load i32, ptr %p3, align 4
-  %ret.4 = add nsw i32 %val3, %ret.3
-  %iv.next = add nuw nsw i64 %iv, 1
-  %iv_sub.next = add nuw i64 %iv_sub, -1
-  %off.next = add nuw nsw i64 %off, 1
-  %exitcond = icmp eq i64 %iv.next, %n
-  br i1 %exitcond, label %exit, label %for.body
-
-exit:
-  %p0.last = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off.next
-  store ptr %p0.last, ptr %p, align 8
-  %p1.last = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off.next
-  store ptr %p1.last, ptr %arrayidx1, align 8
-  %p2.last = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off.next
-  store ptr %p2.last, ptr %arrayidx2, align 8
-  %p3.last = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off.next
-  store ptr %p3.last, ptr %arrayidx3, align 8
-  %arrayidx4 = getelementptr inbounds nuw i64, ptr %p, i64 4
-  store i64 %iv_sub.next, ptr %arrayidx4, align 8
-  ret i32 %ret.4
-}


        


More information about the llvm-branch-commits mailing list